{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1596914505344}], 'timestamp': '2025-09-30 23:00:08.069606', 'step': 0, 'epoch': 0} {'type': 'pplx', 'content': 68890406.29865518, 'timestamp': '2025-09-30 23:00:08.085312', 'step': 0, 'epoch': 0} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:08.179383', 'step': 0, 'epoch': 1} {'type': 'loss', 'content': 0.7998734712600708, 'timestamp': '2025-09-30 23:00:08.193574', 'step': 1, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:08.280498', 'step': 1, 'epoch': 1} {'type': 'loss', 'content': 0.9404081702232361, 'timestamp': '2025-09-30 23:00:08.292923', 'step': 2, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:08.378058', 'step': 2, 'epoch': 1} {'type': 'loss', 'content': 0.9672470688819885, 'timestamp': '2025-09-30 23:00:08.382248', 'step': 3, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:08.446992', 'step': 3, 'epoch': 1} {'type': 'loss', 'content': 0.8534132838249207, 'timestamp': '2025-09-30 23:00:08.492336', 'step': 4, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:08.579046', 'step': 4, 'epoch': 1} {'type': 'loss', 'content': 0.5749672651290894, 'timestamp': '2025-09-30 23:00:08.591334', 'step': 5, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:08.675654', 'step': 5, 'epoch': 1} {'type': 'loss', 'content': 0.5174691677093506, 'timestamp': '2025-09-30 23:00:08.679069', 'step': 6, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:08.743712', 'step': 6, 'epoch': 1} {'type': 'loss', 'content': 0.642789363861084, 'timestamp': '2025-09-30 23:00:08.754927', 'step': 7, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:08.841058', 'step': 7, 'epoch': 1} {'type': 'loss', 'content': 0.4763503670692444, 'timestamp': '2025-09-30 23:00:08.849793', 'step': 8, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:00:08.935795', 'step': 8, 'epoch': 1} {'type': 'loss', 'content': 0.16755951941013336, 'timestamp': '2025-09-30 23:00:08.940566', 'step': 9, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:09.044297', 'step': 9, 'epoch': 1} {'type': 'loss', 'content': 0.1864229291677475, 'timestamp': '2025-09-30 23:00:09.050123', 'step': 10, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:09.110444', 'step': 10, 'epoch': 1} {'type': 'loss', 'content': 0.30102822184562683, 'timestamp': '2025-09-30 23:00:09.113724', 'step': 11, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:09.204786', 'step': 11, 'epoch': 1} {'type': 'loss', 'content': 0.23041369020938873, 'timestamp': '2025-09-30 23:00:09.212626', 'step': 12, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:00:09.293680', 'step': 12, 'epoch': 1} {'type': 'loss', 'content': 0.0712369754910469, 'timestamp': '2025-09-30 23:00:09.307937', 'step': 13, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:09.379530', 'step': 13, 'epoch': 1} {'type': 'loss', 'content': 0.10727699100971222, 'timestamp': '2025-09-30 23:00:09.394456', 'step': 14, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:09.477468', 'step': 14, 'epoch': 1} {'type': 'loss', 'content': 0.08819956332445145, 'timestamp': '2025-09-30 23:00:09.491014', 'step': 15, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:09.568437', 'step': 15, 'epoch': 1} {'type': 'loss', 'content': 0.1015678271651268, 'timestamp': '2025-09-30 23:00:09.585675', 'step': 16, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:09.671166', 'step': 16, 'epoch': 1} {'type': 'loss', 'content': 0.03678148239850998, 'timestamp': '2025-09-30 23:00:09.684691', 'step': 17, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:00:09.752261', 'step': 17, 'epoch': 1} {'type': 'loss', 'content': 0.039659857749938965, 'timestamp': '2025-09-30 23:00:09.756653', 'step': 18, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:09.819776', 'step': 18, 'epoch': 1} {'type': 'loss', 'content': 0.07218135893344879, 'timestamp': '2025-09-30 23:00:09.831073', 'step': 19, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:09.913203', 'step': 19, 'epoch': 1} {'type': 'loss', 'content': 0.06595972180366516, 'timestamp': '2025-09-30 23:00:09.921178', 'step': 20, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:09.989755', 'step': 20, 'epoch': 1} {'type': 'loss', 'content': 0.04808490350842476, 'timestamp': '2025-09-30 23:00:09.999616', 'step': 21, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:10.059207', 'step': 21, 'epoch': 1} {'type': 'loss', 'content': 0.06911224871873856, 'timestamp': '2025-09-30 23:00:10.062612', 'step': 22, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:10.130532', 'step': 22, 'epoch': 1} {'type': 'loss', 'content': 0.04805942624807358, 'timestamp': '2025-09-30 23:00:10.135989', 'step': 23, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:10.202669', 'step': 23, 'epoch': 1} {'type': 'loss', 'content': 0.04779454693198204, 'timestamp': '2025-09-30 23:00:10.211360', 'step': 24, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:10.279246', 'step': 24, 'epoch': 1} {'type': 'loss', 'content': 0.0667390450835228, 'timestamp': '2025-09-30 23:00:10.282767', 'step': 25, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:10.346710', 'step': 25, 'epoch': 1} {'type': 'loss', 'content': 0.04412480816245079, 'timestamp': '2025-09-30 23:00:10.351930', 'step': 26, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:00:10.424778', 'step': 26, 'epoch': 1} {'type': 'loss', 'content': 0.05051908269524574, 'timestamp': '2025-09-30 23:00:10.428264', 'step': 27, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:10.509929', 'step': 27, 'epoch': 1} {'type': 'loss', 'content': 0.03780743479728699, 'timestamp': '2025-09-30 23:00:10.518029', 'step': 28, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:10.601157', 'step': 28, 'epoch': 1} {'type': 'loss', 'content': 0.0232723169028759, 'timestamp': '2025-09-30 23:00:10.604823', 'step': 29, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:10.664398', 'step': 29, 'epoch': 1} {'type': 'loss', 'content': 0.04821785166859627, 'timestamp': '2025-09-30 23:00:10.668340', 'step': 30, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:10.734238', 'step': 30, 'epoch': 1} {'type': 'loss', 'content': 0.04990493506193161, 'timestamp': '2025-09-30 23:00:10.746048', 'step': 31, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:10.816344', 'step': 31, 'epoch': 1} {'type': 'loss', 'content': 0.05117885395884514, 'timestamp': '2025-09-30 23:00:10.832368', 'step': 32, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:10.913427', 'step': 32, 'epoch': 1} {'type': 'loss', 'content': 0.05794329568743706, 'timestamp': '2025-09-30 23:00:10.924248', 'step': 33, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:10.997057', 'step': 33, 'epoch': 1} {'type': 'loss', 'content': 0.060359012335538864, 'timestamp': '2025-09-30 23:00:10.999853', 'step': 34, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:11.072784', 'step': 34, 'epoch': 1} {'type': 'loss', 'content': 0.04033714160323143, 'timestamp': '2025-09-30 23:00:11.084961', 'step': 35, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:11.162053', 'step': 35, 'epoch': 1} {'type': 'loss', 'content': 0.052116818726062775, 'timestamp': '2025-09-30 23:00:11.177261', 'step': 36, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:11.251513', 'step': 36, 'epoch': 1} {'type': 'loss', 'content': 0.044982727617025375, 'timestamp': '2025-09-30 23:00:11.257900', 'step': 37, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:11.335817', 'step': 37, 'epoch': 1} {'type': 'loss', 'content': 0.04954562708735466, 'timestamp': '2025-09-30 23:00:11.347158', 'step': 38, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:11.424148', 'step': 38, 'epoch': 1} {'type': 'loss', 'content': 0.028258560225367546, 'timestamp': '2025-09-30 23:00:11.434098', 'step': 39, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:11.512383', 'step': 39, 'epoch': 1} {'type': 'loss', 'content': 0.056203871965408325, 'timestamp': '2025-09-30 23:00:11.524750', 'step': 40, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:11.588515', 'step': 40, 'epoch': 1} {'type': 'loss', 'content': 0.05110443755984306, 'timestamp': '2025-09-30 23:00:11.592759', 'step': 41, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:00:11.659749', 'step': 41, 'epoch': 1} {'type': 'loss', 'content': 0.037735868245363235, 'timestamp': '2025-09-30 23:00:11.664274', 'step': 42, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:11.728055', 'step': 42, 'epoch': 1} {'type': 'loss', 'content': 0.05166834220290184, 'timestamp': '2025-09-30 23:00:11.739043', 'step': 43, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:11.813424', 'step': 43, 'epoch': 1} {'type': 'loss', 'content': 0.05635031312704086, 'timestamp': '2025-09-30 23:00:11.823130', 'step': 44, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:11.882660', 'step': 44, 'epoch': 1} {'type': 'loss', 'content': 0.03450925648212433, 'timestamp': '2025-09-30 23:00:11.885932', 'step': 45, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:11.956548', 'step': 45, 'epoch': 1} {'type': 'loss', 'content': 0.051866352558135986, 'timestamp': '2025-09-30 23:00:11.962331', 'step': 46, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:12.022633', 'step': 46, 'epoch': 1} {'type': 'loss', 'content': 0.038861773908138275, 'timestamp': '2025-09-30 23:00:12.033797', 'step': 47, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:12.110684', 'step': 47, 'epoch': 1} {'type': 'loss', 'content': 0.06296918541193008, 'timestamp': '2025-09-30 23:00:12.118677', 'step': 48, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:12.190660', 'step': 48, 'epoch': 1} {'type': 'loss', 'content': 0.0499274842441082, 'timestamp': '2025-09-30 23:00:12.204026', 'step': 49, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:12.295782', 'step': 49, 'epoch': 1} {'type': 'loss', 'content': 0.04274194687604904, 'timestamp': '2025-09-30 23:00:12.312103', 'step': 50, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:00:12.393875', 'step': 50, 'epoch': 1} {'type': 'loss', 'content': 0.05967700481414795, 'timestamp': '2025-09-30 23:00:12.410265', 'step': 51, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:00:12.501691', 'step': 51, 'epoch': 1} {'type': 'loss', 'content': 0.041017793118953705, 'timestamp': '2025-09-30 23:00:12.511099', 'step': 52, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:12.597980', 'step': 52, 'epoch': 1} {'type': 'loss', 'content': 0.09592173248529434, 'timestamp': '2025-09-30 23:00:12.614622', 'step': 53, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:12.683536', 'step': 53, 'epoch': 1} {'type': 'loss', 'content': 0.03112044557929039, 'timestamp': '2025-09-30 23:00:12.696334', 'step': 54, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:12.769876', 'step': 54, 'epoch': 1} {'type': 'loss', 'content': 0.043773945420980453, 'timestamp': '2025-09-30 23:00:12.783021', 'step': 55, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:00:12.874067', 'step': 55, 'epoch': 1} {'type': 'loss', 'content': 0.05526348575949669, 'timestamp': '2025-09-30 23:00:12.888943', 'step': 56, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:12.976289', 'step': 56, 'epoch': 1} {'type': 'loss', 'content': 0.017687706276774406, 'timestamp': '2025-09-30 23:00:12.980231', 'step': 57, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:13.060569', 'step': 57, 'epoch': 1} {'type': 'loss', 'content': 0.04532860964536667, 'timestamp': '2025-09-30 23:00:13.065270', 'step': 58, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:13.124432', 'step': 58, 'epoch': 1} {'type': 'loss', 'content': 0.04826122522354126, 'timestamp': '2025-09-30 23:00:13.138389', 'step': 59, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:13.216082', 'step': 59, 'epoch': 1} {'type': 'loss', 'content': 0.10781341046094894, 'timestamp': '2025-09-30 23:00:13.234685', 'step': 60, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:13.314116', 'step': 60, 'epoch': 1} {'type': 'loss', 'content': 0.025772469118237495, 'timestamp': '2025-09-30 23:00:13.318388', 'step': 61, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:13.386431', 'step': 61, 'epoch': 1} {'type': 'loss', 'content': 0.05523237958550453, 'timestamp': '2025-09-30 23:00:13.391665', 'step': 62, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:13.481337', 'step': 62, 'epoch': 1} {'type': 'loss', 'content': 0.06331472843885422, 'timestamp': '2025-09-30 23:00:13.485830', 'step': 63, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:13.580862', 'step': 63, 'epoch': 1} {'type': 'loss', 'content': 0.054480038583278656, 'timestamp': '2025-09-30 23:00:13.589372', 'step': 64, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:13.649139', 'step': 64, 'epoch': 1} {'type': 'loss', 'content': 0.06396270543336868, 'timestamp': '2025-09-30 23:00:13.667045', 'step': 65, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:13.733401', 'step': 65, 'epoch': 1} {'type': 'loss', 'content': 0.02834959700703621, 'timestamp': '2025-09-30 23:00:13.750230', 'step': 66, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:13.822525', 'step': 66, 'epoch': 1} {'type': 'loss', 'content': 0.0301390141248703, 'timestamp': '2025-09-30 23:00:13.828203', 'step': 67, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:13.913883', 'step': 67, 'epoch': 1} {'type': 'loss', 'content': 0.06922207772731781, 'timestamp': '2025-09-30 23:00:13.922634', 'step': 68, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:00:13.992159', 'step': 68, 'epoch': 1} {'type': 'loss', 'content': 0.11976716667413712, 'timestamp': '2025-09-30 23:00:14.004940', 'step': 69, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:14.086195', 'step': 69, 'epoch': 1} {'type': 'loss', 'content': 0.056032322347164154, 'timestamp': '2025-09-30 23:00:14.096589', 'step': 70, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:14.166563', 'step': 70, 'epoch': 1} {'type': 'loss', 'content': 0.04668962582945824, 'timestamp': '2025-09-30 23:00:14.172882', 'step': 71, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:14.248401', 'step': 71, 'epoch': 1} {'type': 'loss', 'content': 0.019045470282435417, 'timestamp': '2025-09-30 23:00:14.257213', 'step': 72, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:14.318362', 'step': 72, 'epoch': 1} {'type': 'loss', 'content': 0.05310192331671715, 'timestamp': '2025-09-30 23:00:14.329547', 'step': 73, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:14.409071', 'step': 73, 'epoch': 1} {'type': 'loss', 'content': 0.06881994754076004, 'timestamp': '2025-09-30 23:00:14.418020', 'step': 74, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:14.502631', 'step': 74, 'epoch': 1} {'type': 'loss', 'content': 0.040872786194086075, 'timestamp': '2025-09-30 23:00:14.506642', 'step': 75, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:14.584168', 'step': 75, 'epoch': 1} {'type': 'loss', 'content': 0.04500585049390793, 'timestamp': '2025-09-30 23:00:14.595510', 'step': 76, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:14.660542', 'step': 76, 'epoch': 1} {'type': 'loss', 'content': 0.06929098069667816, 'timestamp': '2025-09-30 23:00:14.665665', 'step': 77, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:14.722319', 'step': 77, 'epoch': 1} {'type': 'loss', 'content': 0.0922447219491005, 'timestamp': '2025-09-30 23:00:14.729262', 'step': 78, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:14.787186', 'step': 78, 'epoch': 1} {'type': 'loss', 'content': 0.07087849825620651, 'timestamp': '2025-09-30 23:00:14.794604', 'step': 79, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:14.855389', 'step': 79, 'epoch': 1} {'type': 'loss', 'content': 0.03644782304763794, 'timestamp': '2025-09-30 23:00:14.862472', 'step': 80, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:00:14.921049', 'step': 80, 'epoch': 1} {'type': 'loss', 'content': 0.03366159647703171, 'timestamp': '2025-09-30 23:00:14.932841', 'step': 81, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:14.991911', 'step': 81, 'epoch': 1} {'type': 'loss', 'content': 0.05326094478368759, 'timestamp': '2025-09-30 23:00:14.998508', 'step': 82, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:15.068669', 'step': 82, 'epoch': 1} {'type': 'loss', 'content': 0.060151346027851105, 'timestamp': '2025-09-30 23:00:15.072938', 'step': 83, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:15.146330', 'step': 83, 'epoch': 1} {'type': 'loss', 'content': 0.04084090143442154, 'timestamp': '2025-09-30 23:00:15.154556', 'step': 84, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:15.228861', 'step': 84, 'epoch': 1} {'type': 'loss', 'content': 0.04253407567739487, 'timestamp': '2025-09-30 23:00:15.232356', 'step': 85, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:15.289871', 'step': 85, 'epoch': 1} {'type': 'loss', 'content': 0.033624839037656784, 'timestamp': '2025-09-30 23:00:15.296938', 'step': 86, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:15.366552', 'step': 86, 'epoch': 1} {'type': 'loss', 'content': 0.08021355420351028, 'timestamp': '2025-09-30 23:00:15.374071', 'step': 87, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:15.445704', 'step': 87, 'epoch': 1} {'type': 'loss', 'content': 0.04390791803598404, 'timestamp': '2025-09-30 23:00:15.452302', 'step': 88, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:15.517237', 'step': 88, 'epoch': 1} {'type': 'loss', 'content': 0.029049325734376907, 'timestamp': '2025-09-30 23:00:15.523463', 'step': 89, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:00:15.591704', 'step': 89, 'epoch': 1} {'type': 'loss', 'content': 0.05493747070431709, 'timestamp': '2025-09-30 23:00:15.595442', 'step': 90, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:00:15.670291', 'step': 90, 'epoch': 1} {'type': 'loss', 'content': 0.033449191600084305, 'timestamp': '2025-09-30 23:00:15.675838', 'step': 91, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:15.739041', 'step': 91, 'epoch': 1} {'type': 'loss', 'content': 0.032521553337574005, 'timestamp': '2025-09-30 23:00:15.747810', 'step': 92, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:15.827736', 'step': 92, 'epoch': 1} {'type': 'loss', 'content': 0.040845539420843124, 'timestamp': '2025-09-30 23:00:15.843410', 'step': 93, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:15.914729', 'step': 93, 'epoch': 1} {'type': 'loss', 'content': 0.04625949263572693, 'timestamp': '2025-09-30 23:00:15.919879', 'step': 94, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:15.986002', 'step': 94, 'epoch': 1} {'type': 'loss', 'content': 0.05198773369193077, 'timestamp': '2025-09-30 23:00:15.988880', 'step': 95, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:16.048360', 'step': 95, 'epoch': 1} {'type': 'loss', 'content': 0.04677612707018852, 'timestamp': '2025-09-30 23:00:16.061256', 'step': 96, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:00:16.123332', 'step': 96, 'epoch': 1} {'type': 'loss', 'content': 0.05732059106230736, 'timestamp': '2025-09-30 23:00:16.138048', 'step': 97, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:16.214615', 'step': 97, 'epoch': 1} {'type': 'loss', 'content': 0.03370558097958565, 'timestamp': '2025-09-30 23:00:16.218989', 'step': 98, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:16.284648', 'step': 98, 'epoch': 1} {'type': 'loss', 'content': 0.0359845794737339, 'timestamp': '2025-09-30 23:00:16.292797', 'step': 99, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:00:16.375878', 'step': 99, 'epoch': 1} {'type': 'loss', 'content': 0.04939213767647743, 'timestamp': '2025-09-30 23:00:16.382350', 'step': 100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:16.449394', 'step': 100, 'epoch': 1} {'type': 'loss', 'content': 0.0335795134305954, 'timestamp': '2025-09-30 23:00:16.452560', 'step': 101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:16.525839', 'step': 101, 'epoch': 1} {'type': 'loss', 'content': 0.03215102106332779, 'timestamp': '2025-09-30 23:00:16.534214', 'step': 102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:00:16.596584', 'step': 102, 'epoch': 1} {'type': 'loss', 'content': 0.03926919028162956, 'timestamp': '2025-09-30 23:00:16.599369', 'step': 103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:16.674341', 'step': 103, 'epoch': 1} {'type': 'loss', 'content': 0.044499222189188004, 'timestamp': '2025-09-30 23:00:16.681083', 'step': 104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:16.742292', 'step': 104, 'epoch': 1} {'type': 'loss', 'content': 0.015477331355214119, 'timestamp': '2025-09-30 23:00:16.747912', 'step': 105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:16.810423', 'step': 105, 'epoch': 1} {'type': 'loss', 'content': 0.04444548487663269, 'timestamp': '2025-09-30 23:00:16.816772', 'step': 106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:16.872586', 'step': 106, 'epoch': 1} {'type': 'loss', 'content': 0.03392120823264122, 'timestamp': '2025-09-30 23:00:16.881621', 'step': 107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:16.941617', 'step': 107, 'epoch': 1} {'type': 'loss', 'content': 0.05934634059667587, 'timestamp': '2025-09-30 23:00:16.947356', 'step': 108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:17.013399', 'step': 108, 'epoch': 1} {'type': 'loss', 'content': 0.03630489856004715, 'timestamp': '2025-09-30 23:00:17.020098', 'step': 109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:17.089749', 'step': 109, 'epoch': 1} {'type': 'loss', 'content': 0.056694395840168, 'timestamp': '2025-09-30 23:00:17.097951', 'step': 110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:17.163753', 'step': 110, 'epoch': 1} {'type': 'loss', 'content': 0.05559452995657921, 'timestamp': '2025-09-30 23:00:17.171223', 'step': 111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:17.244403', 'step': 111, 'epoch': 1} {'type': 'loss', 'content': 0.040355172008275986, 'timestamp': '2025-09-30 23:00:17.254138', 'step': 112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:17.312835', 'step': 112, 'epoch': 1} {'type': 'loss', 'content': 0.03585350885987282, 'timestamp': '2025-09-30 23:00:17.318732', 'step': 113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:17.387872', 'step': 113, 'epoch': 1} {'type': 'loss', 'content': 0.07106844335794449, 'timestamp': '2025-09-30 23:00:17.391215', 'step': 114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:17.448703', 'step': 114, 'epoch': 1} {'type': 'loss', 'content': 0.07420240342617035, 'timestamp': '2025-09-30 23:00:17.455605', 'step': 115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:17.520527', 'step': 115, 'epoch': 1} {'type': 'loss', 'content': 0.052086181938648224, 'timestamp': '2025-09-30 23:00:17.530532', 'step': 116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:17.591205', 'step': 116, 'epoch': 1} {'type': 'loss', 'content': 0.05115408077836037, 'timestamp': '2025-09-30 23:00:17.594868', 'step': 117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:17.661639', 'step': 117, 'epoch': 1} {'type': 'loss', 'content': 0.037365980446338654, 'timestamp': '2025-09-30 23:00:17.664918', 'step': 118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:17.721254', 'step': 118, 'epoch': 1} {'type': 'loss', 'content': 0.040491968393325806, 'timestamp': '2025-09-30 23:00:17.729093', 'step': 119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:17.793444', 'step': 119, 'epoch': 1} {'type': 'loss', 'content': 0.03324684873223305, 'timestamp': '2025-09-30 23:00:17.805230', 'step': 120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:17.860729', 'step': 120, 'epoch': 1} {'type': 'loss', 'content': 0.024504346773028374, 'timestamp': '2025-09-30 23:00:17.869538', 'step': 121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:17.932308', 'step': 121, 'epoch': 1} {'type': 'loss', 'content': 0.02507551945745945, 'timestamp': '2025-09-30 23:00:17.938604', 'step': 122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:18.002197', 'step': 122, 'epoch': 1} {'type': 'loss', 'content': 0.057266999036073685, 'timestamp': '2025-09-30 23:00:18.007677', 'step': 123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:18.070297', 'step': 123, 'epoch': 1} {'type': 'loss', 'content': 0.0278331208974123, 'timestamp': '2025-09-30 23:00:18.076985', 'step': 124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:18.140226', 'step': 124, 'epoch': 1} {'type': 'loss', 'content': 0.07162158191204071, 'timestamp': '2025-09-30 23:00:18.148902', 'step': 125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:18.212348', 'step': 125, 'epoch': 1} {'type': 'loss', 'content': 0.01984097622334957, 'timestamp': '2025-09-30 23:00:18.215624', 'step': 126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:18.274518', 'step': 126, 'epoch': 1} {'type': 'loss', 'content': 0.04594317823648453, 'timestamp': '2025-09-30 23:00:18.278321', 'step': 127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:18.346935', 'step': 127, 'epoch': 1} {'type': 'loss', 'content': 0.06484483927488327, 'timestamp': '2025-09-30 23:00:18.354696', 'step': 128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:18.427889', 'step': 128, 'epoch': 1} {'type': 'loss', 'content': 0.053895317018032074, 'timestamp': '2025-09-30 23:00:18.431556', 'step': 129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:18.495824', 'step': 129, 'epoch': 1} {'type': 'loss', 'content': 0.03414193540811539, 'timestamp': '2025-09-30 23:00:18.500053', 'step': 130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:18.574349', 'step': 130, 'epoch': 1} {'type': 'loss', 'content': 0.050539761781692505, 'timestamp': '2025-09-30 23:00:18.577875', 'step': 131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:18.646416', 'step': 131, 'epoch': 1} {'type': 'loss', 'content': 0.03900628909468651, 'timestamp': '2025-09-30 23:00:18.657959', 'step': 132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:18.728593', 'step': 132, 'epoch': 1} {'type': 'loss', 'content': 0.013866743072867393, 'timestamp': '2025-09-30 23:00:18.736899', 'step': 133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:18.800518', 'step': 133, 'epoch': 1} {'type': 'loss', 'content': 0.010283105075359344, 'timestamp': '2025-09-30 23:00:18.804633', 'step': 134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:18.870089', 'step': 134, 'epoch': 1} {'type': 'loss', 'content': 0.034226853400468826, 'timestamp': '2025-09-30 23:00:18.874329', 'step': 135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:18.930184', 'step': 135, 'epoch': 1} {'type': 'loss', 'content': 0.02886459417641163, 'timestamp': '2025-09-30 23:00:18.941474', 'step': 136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:19.018565', 'step': 136, 'epoch': 1} {'type': 'loss', 'content': 0.039596762508153915, 'timestamp': '2025-09-30 23:00:19.021706', 'step': 137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:19.094271', 'step': 137, 'epoch': 1} {'type': 'loss', 'content': 0.023388033732771873, 'timestamp': '2025-09-30 23:00:19.103230', 'step': 138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:19.187585', 'step': 138, 'epoch': 1} {'type': 'loss', 'content': 0.042615871876478195, 'timestamp': '2025-09-30 23:00:19.192110', 'step': 139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:19.263643', 'step': 139, 'epoch': 1} {'type': 'loss', 'content': 0.00893484614789486, 'timestamp': '2025-09-30 23:00:19.271696', 'step': 140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:19.346017', 'step': 140, 'epoch': 1} {'type': 'loss', 'content': 0.01848899945616722, 'timestamp': '2025-09-30 23:00:19.349604', 'step': 141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:19.414764', 'step': 141, 'epoch': 1} {'type': 'loss', 'content': 0.07536923885345459, 'timestamp': '2025-09-30 23:00:19.419101', 'step': 142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:19.494716', 'step': 142, 'epoch': 1} {'type': 'loss', 'content': 0.019445979967713356, 'timestamp': '2025-09-30 23:00:19.511850', 'step': 143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:19.609961', 'step': 143, 'epoch': 1} {'type': 'loss', 'content': 0.01882864162325859, 'timestamp': '2025-09-30 23:00:19.629655', 'step': 144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:19.689985', 'step': 144, 'epoch': 1} {'type': 'loss', 'content': 0.02295784279704094, 'timestamp': '2025-09-30 23:00:19.701209', 'step': 145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:19.795356', 'step': 145, 'epoch': 1} {'type': 'loss', 'content': 0.017182616516947746, 'timestamp': '2025-09-30 23:00:19.811504', 'step': 146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:19.895287', 'step': 146, 'epoch': 1} {'type': 'loss', 'content': 0.05131826549768448, 'timestamp': '2025-09-30 23:00:19.910936', 'step': 147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:19.981637', 'step': 147, 'epoch': 1} {'type': 'loss', 'content': 0.0028823204338550568, 'timestamp': '2025-09-30 23:00:19.999931', 'step': 148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:20.085842', 'step': 148, 'epoch': 1} {'type': 'loss', 'content': 0.017770150676369667, 'timestamp': '2025-09-30 23:00:20.096395', 'step': 149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:20.184964', 'step': 149, 'epoch': 1} {'type': 'loss', 'content': 0.01574380323290825, 'timestamp': '2025-09-30 23:00:20.190142', 'step': 150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:20.268757', 'step': 150, 'epoch': 1} {'type': 'loss', 'content': 0.05715281888842583, 'timestamp': '2025-09-30 23:00:20.272831', 'step': 151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:20.343465', 'step': 151, 'epoch': 1} {'type': 'loss', 'content': 0.034892357885837555, 'timestamp': '2025-09-30 23:00:20.358404', 'step': 152, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1596914505344}], 'timestamp': '2025-09-30 23:00:25.261628', 'step': 152, 'epoch': 1} {'type': 'pplx', 'content': 4430028.1106051, 'timestamp': '2025-09-30 23:00:25.269615', 'step': 152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:25.327445', 'step': 152, 'epoch': 1} {'type': 'loss', 'content': 0.07843448966741562, 'timestamp': '2025-09-30 23:00:25.330163', 'step': 153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:25.393826', 'step': 153, 'epoch': 1} {'type': 'loss', 'content': 0.06045497581362724, 'timestamp': '2025-09-30 23:00:25.397696', 'step': 154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:00:25.459204', 'step': 154, 'epoch': 1} {'type': 'loss', 'content': 0.047354038804769516, 'timestamp': '2025-09-30 23:00:25.467411', 'step': 155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:25.541914', 'step': 155, 'epoch': 1} {'type': 'loss', 'content': 0.08155776560306549, 'timestamp': '2025-09-30 23:00:25.560615', 'step': 156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:00:25.658879', 'step': 156, 'epoch': 1} {'type': 'loss', 'content': 0.003923104610294104, 'timestamp': '2025-09-30 23:00:25.663859', 'step': 157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:00:25.735470', 'step': 157, 'epoch': 1} {'type': 'loss', 'content': 0.007102813106030226, 'timestamp': '2025-09-30 23:00:25.752032', 'step': 158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:25.836002', 'step': 158, 'epoch': 1} {'type': 'loss', 'content': 0.01105030532926321, 'timestamp': '2025-09-30 23:00:25.845192', 'step': 159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:25.909800', 'step': 159, 'epoch': 1} {'type': 'loss', 'content': 0.04894297569990158, 'timestamp': '2025-09-30 23:00:25.921889', 'step': 160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:25.980489', 'step': 160, 'epoch': 1} {'type': 'loss', 'content': 0.05882345512509346, 'timestamp': '2025-09-30 23:00:25.984983', 'step': 161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:26.040562', 'step': 161, 'epoch': 1} {'type': 'loss', 'content': 0.019697681069374084, 'timestamp': '2025-09-30 23:00:26.043495', 'step': 162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:26.099179', 'step': 162, 'epoch': 1} {'type': 'loss', 'content': 0.013767341151833534, 'timestamp': '2025-09-30 23:00:26.102175', 'step': 163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:26.157023', 'step': 163, 'epoch': 1} {'type': 'loss', 'content': 0.04042845219373703, 'timestamp': '2025-09-30 23:00:26.166604', 'step': 164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:00:26.225620', 'step': 164, 'epoch': 1} {'type': 'loss', 'content': 0.02323838137090206, 'timestamp': '2025-09-30 23:00:26.227945', 'step': 165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:00:26.286598', 'step': 165, 'epoch': 1} {'type': 'loss', 'content': 0.08872615545988083, 'timestamp': '2025-09-30 23:00:26.289975', 'step': 166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:26.344920', 'step': 166, 'epoch': 1} {'type': 'loss', 'content': 0.030436864122748375, 'timestamp': '2025-09-30 23:00:26.349089', 'step': 167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:26.411114', 'step': 167, 'epoch': 1} {'type': 'loss', 'content': 0.05532066151499748, 'timestamp': '2025-09-30 23:00:26.417712', 'step': 168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:26.478662', 'step': 168, 'epoch': 1} {'type': 'loss', 'content': 0.04502435773611069, 'timestamp': '2025-09-30 23:00:26.483795', 'step': 169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:26.545851', 'step': 169, 'epoch': 1} {'type': 'loss', 'content': 0.025984736159443855, 'timestamp': '2025-09-30 23:00:26.551974', 'step': 170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:26.613987', 'step': 170, 'epoch': 1} {'type': 'loss', 'content': 0.04040054231882095, 'timestamp': '2025-09-30 23:00:26.621747', 'step': 171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:00:26.682773', 'step': 171, 'epoch': 1} {'type': 'loss', 'content': 0.0711660236120224, 'timestamp': '2025-09-30 23:00:26.698683', 'step': 172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:26.769100', 'step': 172, 'epoch': 1} {'type': 'loss', 'content': 0.028761401772499084, 'timestamp': '2025-09-30 23:00:26.785258', 'step': 173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:26.870916', 'step': 173, 'epoch': 1} {'type': 'loss', 'content': 0.04251887649297714, 'timestamp': '2025-09-30 23:00:26.889602', 'step': 174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:26.988222', 'step': 174, 'epoch': 1} {'type': 'loss', 'content': 0.0473906584084034, 'timestamp': '2025-09-30 23:00:27.003002', 'step': 175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:27.088184', 'step': 175, 'epoch': 1} {'type': 'loss', 'content': 0.027712734416127205, 'timestamp': '2025-09-30 23:00:27.096020', 'step': 176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:27.171042', 'step': 176, 'epoch': 1} {'type': 'loss', 'content': 0.038203634321689606, 'timestamp': '2025-09-30 23:00:27.179465', 'step': 177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:27.242880', 'step': 177, 'epoch': 1} {'type': 'loss', 'content': 0.028391823172569275, 'timestamp': '2025-09-30 23:00:27.248547', 'step': 178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:27.329186', 'step': 178, 'epoch': 1} {'type': 'loss', 'content': 0.007985183969140053, 'timestamp': '2025-09-30 23:00:27.338093', 'step': 179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:27.404610', 'step': 179, 'epoch': 1} {'type': 'loss', 'content': 0.049132682383060455, 'timestamp': '2025-09-30 23:00:27.415584', 'step': 180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:00:27.485748', 'step': 180, 'epoch': 1} {'type': 'loss', 'content': 0.01196136511862278, 'timestamp': '2025-09-30 23:00:27.490727', 'step': 181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:27.549478', 'step': 181, 'epoch': 1} {'type': 'loss', 'content': 0.012185721658170223, 'timestamp': '2025-09-30 23:00:27.556429', 'step': 182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:27.611860', 'step': 182, 'epoch': 1} {'type': 'loss', 'content': 0.05655736103653908, 'timestamp': '2025-09-30 23:00:27.615477', 'step': 183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:00:27.672827', 'step': 183, 'epoch': 1} {'type': 'loss', 'content': 0.013233363628387451, 'timestamp': '2025-09-30 23:00:27.682369', 'step': 184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:27.740243', 'step': 184, 'epoch': 1} {'type': 'loss', 'content': 0.04839680343866348, 'timestamp': '2025-09-30 23:00:27.745006', 'step': 185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:27.811413', 'step': 185, 'epoch': 1} {'type': 'loss', 'content': 0.019668450579047203, 'timestamp': '2025-09-30 23:00:27.815505', 'step': 186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:27.874392', 'step': 186, 'epoch': 1} {'type': 'loss', 'content': 0.024808179587125778, 'timestamp': '2025-09-30 23:00:27.878219', 'step': 187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:27.934793', 'step': 187, 'epoch': 1} {'type': 'loss', 'content': 0.07156997919082642, 'timestamp': '2025-09-30 23:00:27.944431', 'step': 188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:28.006961', 'step': 188, 'epoch': 1} {'type': 'loss', 'content': 0.026056746020913124, 'timestamp': '2025-09-30 23:00:28.009722', 'step': 189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:28.076182', 'step': 189, 'epoch': 1} {'type': 'loss', 'content': 0.027650464326143265, 'timestamp': '2025-09-30 23:00:28.083753', 'step': 190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:28.153734', 'step': 190, 'epoch': 1} {'type': 'loss', 'content': 0.05083713307976723, 'timestamp': '2025-09-30 23:00:28.160526', 'step': 191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:28.226568', 'step': 191, 'epoch': 1} {'type': 'loss', 'content': 0.052865780889987946, 'timestamp': '2025-09-30 23:00:28.238213', 'step': 192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:28.304761', 'step': 192, 'epoch': 1} {'type': 'loss', 'content': 0.049188610166311264, 'timestamp': '2025-09-30 23:00:28.308029', 'step': 193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:28.375155', 'step': 193, 'epoch': 1} {'type': 'loss', 'content': 0.03984120115637779, 'timestamp': '2025-09-30 23:00:28.380962', 'step': 194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:28.447441', 'step': 194, 'epoch': 1} {'type': 'loss', 'content': 0.03224222734570503, 'timestamp': '2025-09-30 23:00:28.456819', 'step': 195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:28.527850', 'step': 195, 'epoch': 1} {'type': 'loss', 'content': 0.04354199767112732, 'timestamp': '2025-09-30 23:00:28.537190', 'step': 196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:00:28.601167', 'step': 196, 'epoch': 1} {'type': 'loss', 'content': 0.04223957285284996, 'timestamp': '2025-09-30 23:00:28.608479', 'step': 197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:28.677008', 'step': 197, 'epoch': 1} {'type': 'loss', 'content': 0.044709157198667526, 'timestamp': '2025-09-30 23:00:28.682761', 'step': 198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:28.753613', 'step': 198, 'epoch': 1} {'type': 'loss', 'content': 0.06473253667354584, 'timestamp': '2025-09-30 23:00:28.761563', 'step': 199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:28.823628', 'step': 199, 'epoch': 1} {'type': 'loss', 'content': 0.06478424370288849, 'timestamp': '2025-09-30 23:00:28.833198', 'step': 200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:28.910234', 'step': 200, 'epoch': 1} {'type': 'loss', 'content': 0.02384883165359497, 'timestamp': '2025-09-30 23:00:28.914170', 'step': 201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:00:28.972372', 'step': 201, 'epoch': 1} {'type': 'loss', 'content': 0.027753910049796104, 'timestamp': '2025-09-30 23:00:28.975079', 'step': 202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:29.031604', 'step': 202, 'epoch': 1} {'type': 'loss', 'content': 0.03351321443915367, 'timestamp': '2025-09-30 23:00:29.034206', 'step': 203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:29.090709', 'step': 203, 'epoch': 1} {'type': 'loss', 'content': 0.02594158612191677, 'timestamp': '2025-09-30 23:00:29.099069', 'step': 204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:29.167351', 'step': 204, 'epoch': 1} {'type': 'loss', 'content': 0.013573492877185345, 'timestamp': '2025-09-30 23:00:29.174053', 'step': 205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:29.241187', 'step': 205, 'epoch': 1} {'type': 'loss', 'content': 0.03929531201720238, 'timestamp': '2025-09-30 23:00:29.243906', 'step': 206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:29.298722', 'step': 206, 'epoch': 1} {'type': 'loss', 'content': 0.03946998715400696, 'timestamp': '2025-09-30 23:00:29.302377', 'step': 207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:29.365381', 'step': 207, 'epoch': 1} {'type': 'loss', 'content': 0.030716735869646072, 'timestamp': '2025-09-30 23:00:29.374811', 'step': 208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:29.437900', 'step': 208, 'epoch': 1} {'type': 'loss', 'content': 0.045077573508024216, 'timestamp': '2025-09-30 23:00:29.440189', 'step': 209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:29.495806', 'step': 209, 'epoch': 1} {'type': 'loss', 'content': 0.02477983944118023, 'timestamp': '2025-09-30 23:00:29.499000', 'step': 210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:29.557073', 'step': 210, 'epoch': 1} {'type': 'loss', 'content': 0.07371436804533005, 'timestamp': '2025-09-30 23:00:29.559878', 'step': 211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:29.613700', 'step': 211, 'epoch': 1} {'type': 'loss', 'content': 0.027524704113602638, 'timestamp': '2025-09-30 23:00:29.620009', 'step': 212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:00:29.675754', 'step': 212, 'epoch': 1} {'type': 'loss', 'content': 0.07367073744535446, 'timestamp': '2025-09-30 23:00:29.680887', 'step': 213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:29.746050', 'step': 213, 'epoch': 1} {'type': 'loss', 'content': 0.06713929027318954, 'timestamp': '2025-09-30 23:00:29.749709', 'step': 214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:00:29.812233', 'step': 214, 'epoch': 1} {'type': 'loss', 'content': 0.06317406892776489, 'timestamp': '2025-09-30 23:00:29.816313', 'step': 215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:29.879336', 'step': 215, 'epoch': 1} {'type': 'loss', 'content': 0.026524996384978294, 'timestamp': '2025-09-30 23:00:29.889844', 'step': 216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:29.963368', 'step': 216, 'epoch': 1} {'type': 'loss', 'content': 0.03202807903289795, 'timestamp': '2025-09-30 23:00:29.972786', 'step': 217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:30.031220', 'step': 217, 'epoch': 1} {'type': 'loss', 'content': 0.05523526296019554, 'timestamp': '2025-09-30 23:00:30.047715', 'step': 218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:30.113812', 'step': 218, 'epoch': 1} {'type': 'loss', 'content': 0.044144392013549805, 'timestamp': '2025-09-30 23:00:30.116375', 'step': 219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:30.176718', 'step': 219, 'epoch': 1} {'type': 'loss', 'content': 0.07082711160182953, 'timestamp': '2025-09-30 23:00:30.184207', 'step': 220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:30.241638', 'step': 220, 'epoch': 1} {'type': 'loss', 'content': 0.021938597783446312, 'timestamp': '2025-09-30 23:00:30.244264', 'step': 221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:30.305365', 'step': 221, 'epoch': 1} {'type': 'loss', 'content': 0.03962632268667221, 'timestamp': '2025-09-30 23:00:30.309135', 'step': 222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:30.366027', 'step': 222, 'epoch': 1} {'type': 'loss', 'content': 0.05583740398287773, 'timestamp': '2025-09-30 23:00:30.369348', 'step': 223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:30.424008', 'step': 223, 'epoch': 1} {'type': 'loss', 'content': 0.038421809673309326, 'timestamp': '2025-09-30 23:00:30.430526', 'step': 224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:00:30.483649', 'step': 224, 'epoch': 1} {'type': 'loss', 'content': 0.05382268503308296, 'timestamp': '2025-09-30 23:00:30.486936', 'step': 225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:30.542334', 'step': 225, 'epoch': 1} {'type': 'loss', 'content': 0.034658078104257584, 'timestamp': '2025-09-30 23:00:30.545327', 'step': 226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:00:30.600510', 'step': 226, 'epoch': 1} {'type': 'loss', 'content': 0.06014106422662735, 'timestamp': '2025-09-30 23:00:30.604070', 'step': 227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:30.663925', 'step': 227, 'epoch': 1} {'type': 'loss', 'content': 0.043382707983255386, 'timestamp': '2025-09-30 23:00:30.671530', 'step': 228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:00:30.731039', 'step': 228, 'epoch': 1} {'type': 'loss', 'content': 0.021725963801145554, 'timestamp': '2025-09-30 23:00:30.735153', 'step': 229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:30.824009', 'step': 229, 'epoch': 1} {'type': 'loss', 'content': 0.04094424098730087, 'timestamp': '2025-09-30 23:00:30.827510', 'step': 230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:30.894251', 'step': 230, 'epoch': 1} {'type': 'loss', 'content': 0.01932612620294094, 'timestamp': '2025-09-30 23:00:30.906827', 'step': 231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:30.985276', 'step': 231, 'epoch': 1} {'type': 'loss', 'content': 0.02510308101773262, 'timestamp': '2025-09-30 23:00:30.997147', 'step': 232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:31.070310', 'step': 232, 'epoch': 1} {'type': 'loss', 'content': 0.04681074619293213, 'timestamp': '2025-09-30 23:00:31.075629', 'step': 233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:31.141900', 'step': 233, 'epoch': 1} {'type': 'loss', 'content': 0.032041095197200775, 'timestamp': '2025-09-30 23:00:31.153470', 'step': 234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:31.221760', 'step': 234, 'epoch': 1} {'type': 'loss', 'content': 0.0544588677585125, 'timestamp': '2025-09-30 23:00:31.225886', 'step': 235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:31.283316', 'step': 235, 'epoch': 1} {'type': 'loss', 'content': 0.012723918072879314, 'timestamp': '2025-09-30 23:00:31.290439', 'step': 236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:31.359628', 'step': 236, 'epoch': 1} {'type': 'loss', 'content': 0.026182297617197037, 'timestamp': '2025-09-30 23:00:31.371279', 'step': 237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:31.450952', 'step': 237, 'epoch': 1} {'type': 'loss', 'content': 0.013870901428163052, 'timestamp': '2025-09-30 23:00:31.458895', 'step': 238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:31.518528', 'step': 238, 'epoch': 1} {'type': 'loss', 'content': 0.03757297247648239, 'timestamp': '2025-09-30 23:00:31.522938', 'step': 239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:31.589169', 'step': 239, 'epoch': 1} {'type': 'loss', 'content': 0.029261674731969833, 'timestamp': '2025-09-30 23:00:31.599376', 'step': 240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:31.662139', 'step': 240, 'epoch': 1} {'type': 'loss', 'content': 0.009052173234522343, 'timestamp': '2025-09-30 23:00:31.665110', 'step': 241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:31.733725', 'step': 241, 'epoch': 1} {'type': 'loss', 'content': 0.049832701683044434, 'timestamp': '2025-09-30 23:00:31.743079', 'step': 242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:31.806203', 'step': 242, 'epoch': 1} {'type': 'loss', 'content': 0.033734243363142014, 'timestamp': '2025-09-30 23:00:31.810497', 'step': 243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:31.873089', 'step': 243, 'epoch': 1} {'type': 'loss', 'content': 0.03354579955339432, 'timestamp': '2025-09-30 23:00:31.884951', 'step': 244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:31.946243', 'step': 244, 'epoch': 1} {'type': 'loss', 'content': 0.05205748602747917, 'timestamp': '2025-09-30 23:00:31.953333', 'step': 245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:32.012478', 'step': 245, 'epoch': 1} {'type': 'loss', 'content': 0.036425214260816574, 'timestamp': '2025-09-30 23:00:32.020510', 'step': 246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:32.088752', 'step': 246, 'epoch': 1} {'type': 'loss', 'content': 0.046603091061115265, 'timestamp': '2025-09-30 23:00:32.098502', 'step': 247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:00:32.179545', 'step': 247, 'epoch': 1} {'type': 'loss', 'content': 0.04475140571594238, 'timestamp': '2025-09-30 23:00:32.195447', 'step': 248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:32.267112', 'step': 248, 'epoch': 1} {'type': 'loss', 'content': 0.034931641072034836, 'timestamp': '2025-09-30 23:00:32.276599', 'step': 249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:32.345110', 'step': 249, 'epoch': 1} {'type': 'loss', 'content': 0.04183543473482132, 'timestamp': '2025-09-30 23:00:32.348834', 'step': 250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:00:32.429819', 'step': 250, 'epoch': 1} {'type': 'loss', 'content': 0.03927816078066826, 'timestamp': '2025-09-30 23:00:32.439899', 'step': 251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:32.510906', 'step': 251, 'epoch': 1} {'type': 'loss', 'content': 0.0620463602244854, 'timestamp': '2025-09-30 23:00:32.524081', 'step': 252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:32.598377', 'step': 252, 'epoch': 1} {'type': 'loss', 'content': 0.10250679403543472, 'timestamp': '2025-09-30 23:00:32.602093', 'step': 253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:32.668539', 'step': 253, 'epoch': 1} {'type': 'loss', 'content': 0.02490657940506935, 'timestamp': '2025-09-30 23:00:32.673601', 'step': 254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:32.748917', 'step': 254, 'epoch': 1} {'type': 'loss', 'content': 0.03238468989729881, 'timestamp': '2025-09-30 23:00:32.757021', 'step': 255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:32.824902', 'step': 255, 'epoch': 1} {'type': 'loss', 'content': 0.05367797613143921, 'timestamp': '2025-09-30 23:00:32.836818', 'step': 256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:32.900298', 'step': 256, 'epoch': 1} {'type': 'loss', 'content': 0.025845065712928772, 'timestamp': '2025-09-30 23:00:32.910319', 'step': 257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:32.980209', 'step': 257, 'epoch': 1} {'type': 'loss', 'content': 0.0255956519395113, 'timestamp': '2025-09-30 23:00:32.990553', 'step': 258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:33.044680', 'step': 258, 'epoch': 1} {'type': 'loss', 'content': 0.007359130773693323, 'timestamp': '2025-09-30 23:00:33.047466', 'step': 259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:33.117058', 'step': 259, 'epoch': 1} {'type': 'loss', 'content': 0.04279519245028496, 'timestamp': '2025-09-30 23:00:33.124945', 'step': 260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:33.186010', 'step': 260, 'epoch': 1} {'type': 'loss', 'content': 0.04093652591109276, 'timestamp': '2025-09-30 23:00:33.190186', 'step': 261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:33.257178', 'step': 261, 'epoch': 1} {'type': 'loss', 'content': 0.023653512820601463, 'timestamp': '2025-09-30 23:00:33.261235', 'step': 262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:33.325628', 'step': 262, 'epoch': 1} {'type': 'loss', 'content': 0.03574162349104881, 'timestamp': '2025-09-30 23:00:33.337080', 'step': 263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:33.415176', 'step': 263, 'epoch': 1} {'type': 'loss', 'content': 0.053099747747182846, 'timestamp': '2025-09-30 23:00:33.430123', 'step': 264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:33.512876', 'step': 264, 'epoch': 1} {'type': 'loss', 'content': 0.030250612646341324, 'timestamp': '2025-09-30 23:00:33.521393', 'step': 265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:33.582805', 'step': 265, 'epoch': 1} {'type': 'loss', 'content': 0.006395229138433933, 'timestamp': '2025-09-30 23:00:33.591099', 'step': 266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:33.664203', 'step': 266, 'epoch': 1} {'type': 'loss', 'content': 0.01223982684314251, 'timestamp': '2025-09-30 23:00:33.669324', 'step': 267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:33.744533', 'step': 267, 'epoch': 1} {'type': 'loss', 'content': 0.03102569282054901, 'timestamp': '2025-09-30 23:00:33.752287', 'step': 268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:33.821507', 'step': 268, 'epoch': 1} {'type': 'loss', 'content': 0.0220167338848114, 'timestamp': '2025-09-30 23:00:33.831106', 'step': 269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:33.912145', 'step': 269, 'epoch': 1} {'type': 'loss', 'content': 0.056931138038635254, 'timestamp': '2025-09-30 23:00:33.920787', 'step': 270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:33.994589', 'step': 270, 'epoch': 1} {'type': 'loss', 'content': 0.007207236252725124, 'timestamp': '2025-09-30 23:00:33.999261', 'step': 271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:34.060274', 'step': 271, 'epoch': 1} {'type': 'loss', 'content': 0.030676644295454025, 'timestamp': '2025-09-30 23:00:34.078932', 'step': 272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:34.159524', 'step': 272, 'epoch': 1} {'type': 'loss', 'content': 0.015053967013955116, 'timestamp': '2025-09-30 23:00:34.164325', 'step': 273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:34.239680', 'step': 273, 'epoch': 1} {'type': 'loss', 'content': 0.0435713566839695, 'timestamp': '2025-09-30 23:00:34.254738', 'step': 274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:34.328945', 'step': 274, 'epoch': 1} {'type': 'loss', 'content': 0.03271092101931572, 'timestamp': '2025-09-30 23:00:34.335108', 'step': 275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:00:34.392893', 'step': 275, 'epoch': 1} {'type': 'loss', 'content': 0.012340398505330086, 'timestamp': '2025-09-30 23:00:34.404068', 'step': 276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:00:34.471038', 'step': 276, 'epoch': 1} {'type': 'loss', 'content': 0.11700022965669632, 'timestamp': '2025-09-30 23:00:34.479441', 'step': 277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:34.558259', 'step': 277, 'epoch': 1} {'type': 'loss', 'content': 0.03424360230565071, 'timestamp': '2025-09-30 23:00:34.561521', 'step': 278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:34.621409', 'step': 278, 'epoch': 1} {'type': 'loss', 'content': 0.08968072384595871, 'timestamp': '2025-09-30 23:00:34.630108', 'step': 279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:34.692836', 'step': 279, 'epoch': 1} {'type': 'loss', 'content': 0.0450151227414608, 'timestamp': '2025-09-30 23:00:34.700773', 'step': 280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:34.776804', 'step': 280, 'epoch': 1} {'type': 'loss', 'content': 0.01640566997230053, 'timestamp': '2025-09-30 23:00:34.784049', 'step': 281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:34.850306', 'step': 281, 'epoch': 1} {'type': 'loss', 'content': 0.07612261921167374, 'timestamp': '2025-09-30 23:00:34.854872', 'step': 282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:34.914497', 'step': 282, 'epoch': 1} {'type': 'loss', 'content': 0.023985860869288445, 'timestamp': '2025-09-30 23:00:34.926494', 'step': 283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:34.996132', 'step': 283, 'epoch': 1} {'type': 'loss', 'content': 0.0372890867292881, 'timestamp': '2025-09-30 23:00:35.012508', 'step': 284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:35.096676', 'step': 284, 'epoch': 1} {'type': 'loss', 'content': 0.02812122367322445, 'timestamp': '2025-09-30 23:00:35.115070', 'step': 285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:35.184401', 'step': 285, 'epoch': 1} {'type': 'loss', 'content': 0.02819949761033058, 'timestamp': '2025-09-30 23:00:35.188821', 'step': 286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:35.255789', 'step': 286, 'epoch': 1} {'type': 'loss', 'content': 0.0070059336721897125, 'timestamp': '2025-09-30 23:00:35.263261', 'step': 287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:35.335091', 'step': 287, 'epoch': 1} {'type': 'loss', 'content': 0.030421504750847816, 'timestamp': '2025-09-30 23:00:35.343212', 'step': 288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:35.416648', 'step': 288, 'epoch': 1} {'type': 'loss', 'content': 0.041787873953580856, 'timestamp': '2025-09-30 23:00:35.422070', 'step': 289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:35.493672', 'step': 289, 'epoch': 1} {'type': 'loss', 'content': 0.03151236101984978, 'timestamp': '2025-09-30 23:00:35.500712', 'step': 290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:35.575196', 'step': 290, 'epoch': 1} {'type': 'loss', 'content': 0.045264460146427155, 'timestamp': '2025-09-30 23:00:35.582300', 'step': 291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:35.659824', 'step': 291, 'epoch': 1} {'type': 'loss', 'content': 0.08274871110916138, 'timestamp': '2025-09-30 23:00:35.673166', 'step': 292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:35.743154', 'step': 292, 'epoch': 1} {'type': 'loss', 'content': 0.05356982722878456, 'timestamp': '2025-09-30 23:00:35.750369', 'step': 293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:35.811380', 'step': 293, 'epoch': 1} {'type': 'loss', 'content': 0.06746991723775864, 'timestamp': '2025-09-30 23:00:35.821067', 'step': 294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:35.888642', 'step': 294, 'epoch': 1} {'type': 'loss', 'content': 0.06582744419574738, 'timestamp': '2025-09-30 23:00:35.896302', 'step': 295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:35.963733', 'step': 295, 'epoch': 1} {'type': 'loss', 'content': 0.03643546625971794, 'timestamp': '2025-09-30 23:00:35.976293', 'step': 296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:36.043853', 'step': 296, 'epoch': 1} {'type': 'loss', 'content': 0.042392462491989136, 'timestamp': '2025-09-30 23:00:36.053619', 'step': 297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:36.132112', 'step': 297, 'epoch': 1} {'type': 'loss', 'content': 0.036272112280130386, 'timestamp': '2025-09-30 23:00:36.143376', 'step': 298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:36.215941', 'step': 298, 'epoch': 1} {'type': 'loss', 'content': 0.042991604655981064, 'timestamp': '2025-09-30 23:00:36.222838', 'step': 299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:36.317238', 'step': 299, 'epoch': 1} {'type': 'loss', 'content': 0.030390677973628044, 'timestamp': '2025-09-30 23:00:36.324090', 'step': 300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:36.407714', 'step': 300, 'epoch': 1} {'type': 'loss', 'content': 0.025473596528172493, 'timestamp': '2025-09-30 23:00:36.410487', 'step': 301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:36.473338', 'step': 301, 'epoch': 1} {'type': 'loss', 'content': 0.05824188143014908, 'timestamp': '2025-09-30 23:00:36.481357', 'step': 302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:36.550782', 'step': 302, 'epoch': 1} {'type': 'loss', 'content': 0.034269627183675766, 'timestamp': '2025-09-30 23:00:36.553610', 'step': 303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:36.610899', 'step': 303, 'epoch': 1} {'type': 'loss', 'content': 0.026858842000365257, 'timestamp': '2025-09-30 23:00:36.624425', 'step': 304, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1596914505344}], 'timestamp': '2025-09-30 23:00:42.211878', 'step': 304, 'epoch': 1} {'type': 'pplx', 'content': 4889051.427982028, 'timestamp': '2025-09-30 23:00:42.217571', 'step': 304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:42.273735', 'step': 304, 'epoch': 1} {'type': 'loss', 'content': 0.05650978907942772, 'timestamp': '2025-09-30 23:00:42.288852', 'step': 305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:42.357632', 'step': 305, 'epoch': 1} {'type': 'loss', 'content': 0.052499569952487946, 'timestamp': '2025-09-30 23:00:42.371478', 'step': 306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:42.465013', 'step': 306, 'epoch': 1} {'type': 'loss', 'content': 0.03191816434264183, 'timestamp': '2025-09-30 23:00:42.481178', 'step': 307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:42.554642', 'step': 307, 'epoch': 1} {'type': 'loss', 'content': 0.026577932760119438, 'timestamp': '2025-09-30 23:00:42.567145', 'step': 308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:42.662699', 'step': 308, 'epoch': 1} {'type': 'loss', 'content': 0.03243381902575493, 'timestamp': '2025-09-30 23:00:42.666918', 'step': 309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:42.742272', 'step': 309, 'epoch': 1} {'type': 'loss', 'content': 0.030497489497065544, 'timestamp': '2025-09-30 23:00:42.745525', 'step': 310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:42.856288', 'step': 310, 'epoch': 1} {'type': 'loss', 'content': 0.05511416867375374, 'timestamp': '2025-09-30 23:00:42.860061', 'step': 311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:42.944528', 'step': 311, 'epoch': 1} {'type': 'loss', 'content': 0.02289014495909214, 'timestamp': '2025-09-30 23:00:42.960084', 'step': 312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:43.038097', 'step': 312, 'epoch': 1} {'type': 'loss', 'content': 0.04719844087958336, 'timestamp': '2025-09-30 23:00:43.041715', 'step': 313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:43.113013', 'step': 313, 'epoch': 1} {'type': 'loss', 'content': 0.04035595431923866, 'timestamp': '2025-09-30 23:00:43.128066', 'step': 314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:43.208840', 'step': 314, 'epoch': 1} {'type': 'loss', 'content': 0.01712874509394169, 'timestamp': '2025-09-30 23:00:43.213431', 'step': 315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:43.292433', 'step': 315, 'epoch': 1} {'type': 'loss', 'content': 0.021069535985589027, 'timestamp': '2025-09-30 23:00:43.299978', 'step': 316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:00:43.369374', 'step': 316, 'epoch': 1} {'type': 'loss', 'content': 0.023909414187073708, 'timestamp': '2025-09-30 23:00:43.374072', 'step': 317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:43.458011', 'step': 317, 'epoch': 1} {'type': 'loss', 'content': 0.05216992273926735, 'timestamp': '2025-09-30 23:00:43.465304', 'step': 318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:43.534447', 'step': 318, 'epoch': 1} {'type': 'loss', 'content': 0.024300476536154747, 'timestamp': '2025-09-30 23:00:43.540378', 'step': 319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:43.617044', 'step': 319, 'epoch': 1} {'type': 'loss', 'content': 0.02711714431643486, 'timestamp': '2025-09-30 23:00:43.627708', 'step': 320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:43.704770', 'step': 320, 'epoch': 1} {'type': 'loss', 'content': 0.04972752183675766, 'timestamp': '2025-09-30 23:00:43.708625', 'step': 321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:43.795403', 'step': 321, 'epoch': 1} {'type': 'loss', 'content': 0.044416170567274094, 'timestamp': '2025-09-30 23:00:43.799974', 'step': 322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:00:43.877415', 'step': 322, 'epoch': 1} {'type': 'loss', 'content': 0.02953939326107502, 'timestamp': '2025-09-30 23:00:43.886424', 'step': 323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:43.964352', 'step': 323, 'epoch': 1} {'type': 'loss', 'content': 0.019474023953080177, 'timestamp': '2025-09-30 23:00:43.974168', 'step': 324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:00:44.035809', 'step': 324, 'epoch': 1} {'type': 'loss', 'content': 0.051965344697237015, 'timestamp': '2025-09-30 23:00:44.047622', 'step': 325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:44.121405', 'step': 325, 'epoch': 1} {'type': 'loss', 'content': 0.04470045492053032, 'timestamp': '2025-09-30 23:00:44.125479', 'step': 326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:44.200396', 'step': 326, 'epoch': 1} {'type': 'loss', 'content': 0.01784358359873295, 'timestamp': '2025-09-30 23:00:44.204259', 'step': 327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:44.280125', 'step': 327, 'epoch': 1} {'type': 'loss', 'content': 0.02985723689198494, 'timestamp': '2025-09-30 23:00:44.287140', 'step': 328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:44.366216', 'step': 328, 'epoch': 1} {'type': 'loss', 'content': 0.04419001191854477, 'timestamp': '2025-09-30 23:00:44.370244', 'step': 329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:44.447414', 'step': 329, 'epoch': 1} {'type': 'loss', 'content': 0.042204588651657104, 'timestamp': '2025-09-30 23:00:44.463832', 'step': 330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:44.521711', 'step': 330, 'epoch': 1} {'type': 'loss', 'content': 0.021355343982577324, 'timestamp': '2025-09-30 23:00:44.527597', 'step': 331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:44.605675', 'step': 331, 'epoch': 1} {'type': 'loss', 'content': 0.05255729332566261, 'timestamp': '2025-09-30 23:00:44.615458', 'step': 332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:44.692027', 'step': 332, 'epoch': 1} {'type': 'loss', 'content': 0.02229163609445095, 'timestamp': '2025-09-30 23:00:44.698432', 'step': 333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:44.766584', 'step': 333, 'epoch': 1} {'type': 'loss', 'content': 0.050388991832733154, 'timestamp': '2025-09-30 23:00:44.769728', 'step': 334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:44.839696', 'step': 334, 'epoch': 1} {'type': 'loss', 'content': 0.03574323654174805, 'timestamp': '2025-09-30 23:00:44.842286', 'step': 335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:44.921387', 'step': 335, 'epoch': 1} {'type': 'loss', 'content': 0.04174404591321945, 'timestamp': '2025-09-30 23:00:44.935747', 'step': 336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:45.009157', 'step': 336, 'epoch': 1} {'type': 'loss', 'content': 0.048843443393707275, 'timestamp': '2025-09-30 23:00:45.013713', 'step': 337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:45.089345', 'step': 337, 'epoch': 1} {'type': 'loss', 'content': 0.040590766817331314, 'timestamp': '2025-09-30 23:00:45.098326', 'step': 338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:45.182087', 'step': 338, 'epoch': 1} {'type': 'loss', 'content': 0.05963724106550217, 'timestamp': '2025-09-30 23:00:45.194270', 'step': 339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:45.276198', 'step': 339, 'epoch': 1} {'type': 'loss', 'content': 0.02483440563082695, 'timestamp': '2025-09-30 23:00:45.282829', 'step': 340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:45.371003', 'step': 340, 'epoch': 1} {'type': 'loss', 'content': 0.007522960659116507, 'timestamp': '2025-09-30 23:00:45.381094', 'step': 341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:45.471357', 'step': 341, 'epoch': 1} {'type': 'loss', 'content': 0.03834037855267525, 'timestamp': '2025-09-30 23:00:45.475822', 'step': 342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:45.564833', 'step': 342, 'epoch': 1} {'type': 'loss', 'content': 0.05275029316544533, 'timestamp': '2025-09-30 23:00:45.567802', 'step': 343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:45.643939', 'step': 343, 'epoch': 1} {'type': 'loss', 'content': 0.02335212007164955, 'timestamp': '2025-09-30 23:00:45.657682', 'step': 344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:00:45.735164', 'step': 344, 'epoch': 1} {'type': 'loss', 'content': 0.031245727092027664, 'timestamp': '2025-09-30 23:00:45.752282', 'step': 345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:45.846927', 'step': 345, 'epoch': 1} {'type': 'loss', 'content': 0.039275120943784714, 'timestamp': '2025-09-30 23:00:45.852402', 'step': 346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:45.934890', 'step': 346, 'epoch': 1} {'type': 'loss', 'content': 0.049076419323682785, 'timestamp': '2025-09-30 23:00:45.951688', 'step': 347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:46.023300', 'step': 347, 'epoch': 1} {'type': 'loss', 'content': 0.04472209885716438, 'timestamp': '2025-09-30 23:00:46.034517', 'step': 348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:46.113925', 'step': 348, 'epoch': 1} {'type': 'loss', 'content': 0.009678775444626808, 'timestamp': '2025-09-30 23:00:46.128425', 'step': 349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:46.214791', 'step': 349, 'epoch': 1} {'type': 'loss', 'content': 0.04840783029794693, 'timestamp': '2025-09-30 23:00:46.229547', 'step': 350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:46.313837', 'step': 350, 'epoch': 1} {'type': 'loss', 'content': 0.05114486441016197, 'timestamp': '2025-09-30 23:00:46.316908', 'step': 351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:46.372817', 'step': 351, 'epoch': 1} {'type': 'loss', 'content': 0.04566707834601402, 'timestamp': '2025-09-30 23:00:46.379773', 'step': 352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:46.466371', 'step': 352, 'epoch': 1} {'type': 'loss', 'content': 0.05207475274801254, 'timestamp': '2025-09-30 23:00:46.470034', 'step': 353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:46.546015', 'step': 353, 'epoch': 1} {'type': 'loss', 'content': 0.041106607764959335, 'timestamp': '2025-09-30 23:00:46.558428', 'step': 354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:00:46.635839', 'step': 354, 'epoch': 1} {'type': 'loss', 'content': 0.03940912336111069, 'timestamp': '2025-09-30 23:00:46.647314', 'step': 355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:46.722558', 'step': 355, 'epoch': 1} {'type': 'loss', 'content': 0.07159552723169327, 'timestamp': '2025-09-30 23:00:46.740010', 'step': 356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:46.824728', 'step': 356, 'epoch': 1} {'type': 'loss', 'content': 0.029124855995178223, 'timestamp': '2025-09-30 23:00:46.838354', 'step': 357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:46.937057', 'step': 357, 'epoch': 1} {'type': 'loss', 'content': 0.027898002415895462, 'timestamp': '2025-09-30 23:00:46.949037', 'step': 358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:47.035952', 'step': 358, 'epoch': 1} {'type': 'loss', 'content': 0.036239683628082275, 'timestamp': '2025-09-30 23:00:47.046309', 'step': 359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:47.132625', 'step': 359, 'epoch': 1} {'type': 'loss', 'content': 0.04503398761153221, 'timestamp': '2025-09-30 23:00:47.139894', 'step': 360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:47.197751', 'step': 360, 'epoch': 1} {'type': 'loss', 'content': 0.029047559946775436, 'timestamp': '2025-09-30 23:00:47.208840', 'step': 361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:47.268820', 'step': 361, 'epoch': 1} {'type': 'loss', 'content': 0.06254284083843231, 'timestamp': '2025-09-30 23:00:47.274280', 'step': 362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:47.359051', 'step': 362, 'epoch': 1} {'type': 'loss', 'content': 0.04584518447518349, 'timestamp': '2025-09-30 23:00:47.372026', 'step': 363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:47.462084', 'step': 363, 'epoch': 1} {'type': 'loss', 'content': 0.052903808653354645, 'timestamp': '2025-09-30 23:00:47.470470', 'step': 364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:47.542395', 'step': 364, 'epoch': 1} {'type': 'loss', 'content': 0.029773814603686333, 'timestamp': '2025-09-30 23:00:47.547367', 'step': 365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:47.628016', 'step': 365, 'epoch': 1} {'type': 'loss', 'content': 0.044253136962652206, 'timestamp': '2025-09-30 23:00:47.631716', 'step': 366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:47.693144', 'step': 366, 'epoch': 1} {'type': 'loss', 'content': 0.039406757801771164, 'timestamp': '2025-09-30 23:00:47.696678', 'step': 367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:47.770177', 'step': 367, 'epoch': 1} {'type': 'loss', 'content': 0.016734536737203598, 'timestamp': '2025-09-30 23:00:47.786282', 'step': 368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:47.869404', 'step': 368, 'epoch': 1} {'type': 'loss', 'content': 0.030914796516299248, 'timestamp': '2025-09-30 23:00:47.877546', 'step': 369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:47.940745', 'step': 369, 'epoch': 1} {'type': 'loss', 'content': 0.03591708466410637, 'timestamp': '2025-09-30 23:00:47.953242', 'step': 370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:48.037386', 'step': 370, 'epoch': 1} {'type': 'loss', 'content': 0.050253190100193024, 'timestamp': '2025-09-30 23:00:48.047384', 'step': 371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:48.121880', 'step': 371, 'epoch': 1} {'type': 'loss', 'content': 0.03427679464221001, 'timestamp': '2025-09-30 23:00:48.140144', 'step': 372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:48.208373', 'step': 372, 'epoch': 1} {'type': 'loss', 'content': 0.014128180220723152, 'timestamp': '2025-09-30 23:00:48.211519', 'step': 373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:48.297523', 'step': 373, 'epoch': 1} {'type': 'loss', 'content': 0.010808355174958706, 'timestamp': '2025-09-30 23:00:48.310801', 'step': 374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:48.382384', 'step': 374, 'epoch': 1} {'type': 'loss', 'content': 0.017306286841630936, 'timestamp': '2025-09-30 23:00:48.385765', 'step': 375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:48.445249', 'step': 375, 'epoch': 1} {'type': 'loss', 'content': 0.045016031712293625, 'timestamp': '2025-09-30 23:00:48.461439', 'step': 376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:48.535879', 'step': 376, 'epoch': 1} {'type': 'loss', 'content': 0.019429802894592285, 'timestamp': '2025-09-30 23:00:48.539700', 'step': 377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:48.619237', 'step': 377, 'epoch': 1} {'type': 'loss', 'content': 0.017051788046956062, 'timestamp': '2025-09-30 23:00:48.633586', 'step': 378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:48.723014', 'step': 378, 'epoch': 1} {'type': 'loss', 'content': 0.033711206167936325, 'timestamp': '2025-09-30 23:00:48.726869', 'step': 379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:48.801906', 'step': 379, 'epoch': 1} {'type': 'loss', 'content': 0.027453159913420677, 'timestamp': '2025-09-30 23:00:48.817850', 'step': 380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:48.901664', 'step': 380, 'epoch': 1} {'type': 'loss', 'content': 0.017395824193954468, 'timestamp': '2025-09-30 23:00:48.905190', 'step': 381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:48.987617', 'step': 381, 'epoch': 1} {'type': 'loss', 'content': 0.0293978750705719, 'timestamp': '2025-09-30 23:00:48.998767', 'step': 382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:49.063194', 'step': 382, 'epoch': 1} {'type': 'loss', 'content': 0.043642040342092514, 'timestamp': '2025-09-30 23:00:49.075243', 'step': 383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:49.153563', 'step': 383, 'epoch': 1} {'type': 'loss', 'content': 0.023425057530403137, 'timestamp': '2025-09-30 23:00:49.170247', 'step': 384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:00:49.263776', 'step': 384, 'epoch': 1} {'type': 'loss', 'content': 0.046821705996990204, 'timestamp': '2025-09-30 23:00:49.267490', 'step': 385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:49.344398', 'step': 385, 'epoch': 1} {'type': 'loss', 'content': 0.021393287926912308, 'timestamp': '2025-09-30 23:00:49.360191', 'step': 386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:49.437152', 'step': 386, 'epoch': 1} {'type': 'loss', 'content': 0.06703878939151764, 'timestamp': '2025-09-30 23:00:49.449490', 'step': 387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:49.529552', 'step': 387, 'epoch': 1} {'type': 'loss', 'content': 0.049714259803295135, 'timestamp': '2025-09-30 23:00:49.544413', 'step': 388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:49.612421', 'step': 388, 'epoch': 1} {'type': 'loss', 'content': 0.007205241825431585, 'timestamp': '2025-09-30 23:00:49.617153', 'step': 389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:49.702053', 'step': 389, 'epoch': 1} {'type': 'loss', 'content': 0.03658789396286011, 'timestamp': '2025-09-30 23:00:49.706492', 'step': 390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:49.784800', 'step': 390, 'epoch': 1} {'type': 'loss', 'content': 0.09463126212358475, 'timestamp': '2025-09-30 23:00:49.795127', 'step': 391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:49.865443', 'step': 391, 'epoch': 1} {'type': 'loss', 'content': 0.013039781711995602, 'timestamp': '2025-09-30 23:00:49.880309', 'step': 392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:49.957121', 'step': 392, 'epoch': 1} {'type': 'loss', 'content': 0.02903187833726406, 'timestamp': '2025-09-30 23:00:49.963197', 'step': 393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:00:50.035800', 'step': 393, 'epoch': 1} {'type': 'loss', 'content': 0.008968008682131767, 'timestamp': '2025-09-30 23:00:50.040143', 'step': 394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:50.098408', 'step': 394, 'epoch': 1} {'type': 'loss', 'content': 0.049672726541757584, 'timestamp': '2025-09-30 23:00:50.104927', 'step': 395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:50.189315', 'step': 395, 'epoch': 1} {'type': 'loss', 'content': 0.02847207896411419, 'timestamp': '2025-09-30 23:00:50.209638', 'step': 396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:50.288519', 'step': 396, 'epoch': 1} {'type': 'loss', 'content': 0.05579733848571777, 'timestamp': '2025-09-30 23:00:50.300643', 'step': 397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:50.386742', 'step': 397, 'epoch': 1} {'type': 'loss', 'content': 0.033565424382686615, 'timestamp': '2025-09-30 23:00:50.390535', 'step': 398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:50.478482', 'step': 398, 'epoch': 1} {'type': 'loss', 'content': 0.09042485803365707, 'timestamp': '2025-09-30 23:00:50.483307', 'step': 399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:50.566381', 'step': 399, 'epoch': 1} {'type': 'loss', 'content': 0.1084856316447258, 'timestamp': '2025-09-30 23:00:50.573870', 'step': 400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:50.632780', 'step': 400, 'epoch': 1} {'type': 'loss', 'content': 0.021969081833958626, 'timestamp': '2025-09-30 23:00:50.637212', 'step': 401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:50.702411', 'step': 401, 'epoch': 1} {'type': 'loss', 'content': 0.03176932781934738, 'timestamp': '2025-09-30 23:00:50.705743', 'step': 402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:50.773264', 'step': 402, 'epoch': 1} {'type': 'loss', 'content': 0.039941053837537766, 'timestamp': '2025-09-30 23:00:50.777276', 'step': 403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:50.848888', 'step': 403, 'epoch': 1} {'type': 'loss', 'content': 0.03473624959588051, 'timestamp': '2025-09-30 23:00:50.858526', 'step': 404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:00:50.930308', 'step': 404, 'epoch': 1} {'type': 'loss', 'content': 0.031989000737667084, 'timestamp': '2025-09-30 23:00:50.946397', 'step': 405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:51.024781', 'step': 405, 'epoch': 1} {'type': 'loss', 'content': 0.020351378247141838, 'timestamp': '2025-09-30 23:00:51.029378', 'step': 406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:51.108031', 'step': 406, 'epoch': 1} {'type': 'loss', 'content': 0.0489177331328392, 'timestamp': '2025-09-30 23:00:51.113561', 'step': 407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:51.200058', 'step': 407, 'epoch': 1} {'type': 'loss', 'content': 0.035687267780303955, 'timestamp': '2025-09-30 23:00:51.207789', 'step': 408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:51.295841', 'step': 408, 'epoch': 1} {'type': 'loss', 'content': 0.03462941572070122, 'timestamp': '2025-09-30 23:00:51.308531', 'step': 409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:51.377890', 'step': 409, 'epoch': 1} {'type': 'loss', 'content': 0.03417656570672989, 'timestamp': '2025-09-30 23:00:51.381392', 'step': 410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:00:51.472310', 'step': 410, 'epoch': 1} {'type': 'loss', 'content': 0.05209970101714134, 'timestamp': '2025-09-30 23:00:51.477875', 'step': 411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:51.559912', 'step': 411, 'epoch': 1} {'type': 'loss', 'content': 0.012682460248470306, 'timestamp': '2025-09-30 23:00:51.576655', 'step': 412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:51.670216', 'step': 412, 'epoch': 1} {'type': 'loss', 'content': 0.033751316368579865, 'timestamp': '2025-09-30 23:00:51.675718', 'step': 413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:51.749030', 'step': 413, 'epoch': 1} {'type': 'loss', 'content': 0.04326338693499565, 'timestamp': '2025-09-30 23:00:51.764551', 'step': 414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:00:51.835392', 'step': 414, 'epoch': 1} {'type': 'loss', 'content': 0.034662503749132156, 'timestamp': '2025-09-30 23:00:51.851461', 'step': 415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:51.946892', 'step': 415, 'epoch': 1} {'type': 'loss', 'content': 0.03149989992380142, 'timestamp': '2025-09-30 23:00:51.966761', 'step': 416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:52.050370', 'step': 416, 'epoch': 1} {'type': 'loss', 'content': 0.07099824398756027, 'timestamp': '2025-09-30 23:00:52.055416', 'step': 417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:52.160953', 'step': 417, 'epoch': 1} {'type': 'loss', 'content': 0.025622209534049034, 'timestamp': '2025-09-30 23:00:52.173023', 'step': 418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:52.251652', 'step': 418, 'epoch': 1} {'type': 'loss', 'content': 0.02595755271613598, 'timestamp': '2025-09-30 23:00:52.260248', 'step': 419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:52.330759', 'step': 419, 'epoch': 1} {'type': 'loss', 'content': 0.06664516031742096, 'timestamp': '2025-09-30 23:00:52.344284', 'step': 420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:52.414710', 'step': 420, 'epoch': 1} {'type': 'loss', 'content': 0.022538546472787857, 'timestamp': '2025-09-30 23:00:52.422415', 'step': 421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:52.495950', 'step': 421, 'epoch': 1} {'type': 'loss', 'content': 0.058455951511859894, 'timestamp': '2025-09-30 23:00:52.500593', 'step': 422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:52.562874', 'step': 422, 'epoch': 1} {'type': 'loss', 'content': 0.03306744992733002, 'timestamp': '2025-09-30 23:00:52.568207', 'step': 423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:52.630466', 'step': 423, 'epoch': 1} {'type': 'loss', 'content': 0.04472726956009865, 'timestamp': '2025-09-30 23:00:52.640504', 'step': 424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:52.702230', 'step': 424, 'epoch': 1} {'type': 'loss', 'content': 0.06794262677431107, 'timestamp': '2025-09-30 23:00:52.708351', 'step': 425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:52.768536', 'step': 425, 'epoch': 1} {'type': 'loss', 'content': 0.042980097234249115, 'timestamp': '2025-09-30 23:00:52.775979', 'step': 426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:52.840070', 'step': 426, 'epoch': 1} {'type': 'loss', 'content': 0.040164314210414886, 'timestamp': '2025-09-30 23:00:52.844230', 'step': 427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:52.926185', 'step': 427, 'epoch': 1} {'type': 'loss', 'content': 0.016769396141171455, 'timestamp': '2025-09-30 23:00:52.942782', 'step': 428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:53.029688', 'step': 428, 'epoch': 1} {'type': 'loss', 'content': 0.015767255797982216, 'timestamp': '2025-09-30 23:00:53.043100', 'step': 429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:53.133948', 'step': 429, 'epoch': 1} {'type': 'loss', 'content': 0.05007514730095863, 'timestamp': '2025-09-30 23:00:53.151993', 'step': 430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:53.248248', 'step': 430, 'epoch': 1} {'type': 'loss', 'content': 0.01128921564668417, 'timestamp': '2025-09-30 23:00:53.254487', 'step': 431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:53.327737', 'step': 431, 'epoch': 1} {'type': 'loss', 'content': 0.018204916268587112, 'timestamp': '2025-09-30 23:00:53.348328', 'step': 432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:53.443433', 'step': 432, 'epoch': 1} {'type': 'loss', 'content': 0.05474850907921791, 'timestamp': '2025-09-30 23:00:53.456869', 'step': 433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:53.550808', 'step': 433, 'epoch': 1} {'type': 'loss', 'content': 0.04729244485497475, 'timestamp': '2025-09-30 23:00:53.555091', 'step': 434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:53.616852', 'step': 434, 'epoch': 1} {'type': 'loss', 'content': 0.03882854804396629, 'timestamp': '2025-09-30 23:00:53.622713', 'step': 435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:53.703744', 'step': 435, 'epoch': 1} {'type': 'loss', 'content': 0.02964492328464985, 'timestamp': '2025-09-30 23:00:53.711024', 'step': 436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:53.798162', 'step': 436, 'epoch': 1} {'type': 'loss', 'content': 0.05574679374694824, 'timestamp': '2025-09-30 23:00:53.811443', 'step': 437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:53.895293', 'step': 437, 'epoch': 1} {'type': 'loss', 'content': 0.03744414448738098, 'timestamp': '2025-09-30 23:00:53.900745', 'step': 438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:53.982606', 'step': 438, 'epoch': 1} {'type': 'loss', 'content': 0.03485554829239845, 'timestamp': '2025-09-30 23:00:53.993870', 'step': 439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:54.077343', 'step': 439, 'epoch': 1} {'type': 'loss', 'content': 0.025514891371130943, 'timestamp': '2025-09-30 23:00:54.092135', 'step': 440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:54.170745', 'step': 440, 'epoch': 1} {'type': 'loss', 'content': 0.04255019500851631, 'timestamp': '2025-09-30 23:00:54.174437', 'step': 441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:54.230836', 'step': 441, 'epoch': 1} {'type': 'loss', 'content': 0.02529151551425457, 'timestamp': '2025-09-30 23:00:54.245568', 'step': 442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:54.325067', 'step': 442, 'epoch': 1} {'type': 'loss', 'content': 0.05607016012072563, 'timestamp': '2025-09-30 23:00:54.334484', 'step': 443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:54.416535', 'step': 443, 'epoch': 1} {'type': 'loss', 'content': 0.04833246022462845, 'timestamp': '2025-09-30 23:00:54.431024', 'step': 444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:54.512520', 'step': 444, 'epoch': 1} {'type': 'loss', 'content': 0.04935091361403465, 'timestamp': '2025-09-30 23:00:54.523182', 'step': 445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:00:54.606350', 'step': 445, 'epoch': 1} {'type': 'loss', 'content': 0.033758874982595444, 'timestamp': '2025-09-30 23:00:54.609225', 'step': 446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:54.683756', 'step': 446, 'epoch': 1} {'type': 'loss', 'content': 0.02053472213447094, 'timestamp': '2025-09-30 23:00:54.695612', 'step': 447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:54.776499', 'step': 447, 'epoch': 1} {'type': 'loss', 'content': 0.05356964096426964, 'timestamp': '2025-09-30 23:00:54.783598', 'step': 448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:00:54.875921', 'step': 448, 'epoch': 1} {'type': 'loss', 'content': 0.031138325110077858, 'timestamp': '2025-09-30 23:00:54.887834', 'step': 449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:54.972236', 'step': 449, 'epoch': 1} {'type': 'loss', 'content': 0.03291929513216019, 'timestamp': '2025-09-30 23:00:54.983600', 'step': 450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:55.077058', 'step': 450, 'epoch': 1} {'type': 'loss', 'content': 0.015326551161706448, 'timestamp': '2025-09-30 23:00:55.092480', 'step': 451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:55.174703', 'step': 451, 'epoch': 1} {'type': 'loss', 'content': 0.04833778738975525, 'timestamp': '2025-09-30 23:00:55.192376', 'step': 452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:55.280353', 'step': 452, 'epoch': 1} {'type': 'loss', 'content': 0.03829830884933472, 'timestamp': '2025-09-30 23:00:55.285634', 'step': 453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:55.352802', 'step': 453, 'epoch': 1} {'type': 'loss', 'content': 0.025643138214945793, 'timestamp': '2025-09-30 23:00:55.365815', 'step': 454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:00:55.460776', 'step': 454, 'epoch': 1} {'type': 'loss', 'content': 0.03799784556031227, 'timestamp': '2025-09-30 23:00:55.473818', 'step': 455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:00:55.568130', 'step': 455, 'epoch': 1} {'type': 'loss', 'content': 0.06697523593902588, 'timestamp': '2025-09-30 23:00:55.585620', 'step': 456, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1596914505344}], 'timestamp': '2025-09-30 23:01:01.190320', 'step': 456, 'epoch': 1} {'type': 'pplx', 'content': 5076059.8514167955, 'timestamp': '2025-09-30 23:01:01.204122', 'step': 456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:01.277270', 'step': 456, 'epoch': 1} {'type': 'loss', 'content': 0.02044263482093811, 'timestamp': '2025-09-30 23:01:01.281593', 'step': 457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:01.342377', 'step': 457, 'epoch': 1} {'type': 'loss', 'content': 0.028361590579152107, 'timestamp': '2025-09-30 23:01:01.346109', 'step': 458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:01.415768', 'step': 458, 'epoch': 1} {'type': 'loss', 'content': 0.028728734701871872, 'timestamp': '2025-09-30 23:01:01.422261', 'step': 459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:01.487152', 'step': 459, 'epoch': 1} {'type': 'loss', 'content': 0.04267637059092522, 'timestamp': '2025-09-30 23:01:01.497843', 'step': 460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:01.566094', 'step': 460, 'epoch': 1} {'type': 'loss', 'content': 0.02915314771234989, 'timestamp': '2025-09-30 23:01:01.569992', 'step': 461, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:01.631636', 'step': 461, 'epoch': 1} {'type': 'loss', 'content': 0.02577696554362774, 'timestamp': '2025-09-30 23:01:01.635449', 'step': 462, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:01.694090', 'step': 462, 'epoch': 1} {'type': 'loss', 'content': 0.03458274528384209, 'timestamp': '2025-09-30 23:01:01.699226', 'step': 463, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:01.759720', 'step': 463, 'epoch': 1} {'type': 'loss', 'content': 0.026424091309309006, 'timestamp': '2025-09-30 23:01:01.769867', 'step': 464, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:01.828965', 'step': 464, 'epoch': 1} {'type': 'loss', 'content': 0.07274480164051056, 'timestamp': '2025-09-30 23:01:01.834781', 'step': 465, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:01.896663', 'step': 465, 'epoch': 1} {'type': 'loss', 'content': 0.03511844947934151, 'timestamp': '2025-09-30 23:01:01.899303', 'step': 466, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:01.952825', 'step': 466, 'epoch': 1} {'type': 'loss', 'content': 0.04705628752708435, 'timestamp': '2025-09-30 23:01:01.956659', 'step': 467, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:01:02.013201', 'step': 467, 'epoch': 1} {'type': 'loss', 'content': 0.04946957156062126, 'timestamp': '2025-09-30 23:01:02.020021', 'step': 468, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:02.077189', 'step': 468, 'epoch': 1} {'type': 'loss', 'content': 0.018713489174842834, 'timestamp': '2025-09-30 23:01:02.080754', 'step': 469, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:02.136940', 'step': 469, 'epoch': 1} {'type': 'loss', 'content': 0.019935185089707375, 'timestamp': '2025-09-30 23:01:02.145723', 'step': 470, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:02.204429', 'step': 470, 'epoch': 1} {'type': 'loss', 'content': 0.0408964678645134, 'timestamp': '2025-09-30 23:01:02.208433', 'step': 471, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:02.275693', 'step': 471, 'epoch': 1} {'type': 'loss', 'content': 0.04144855961203575, 'timestamp': '2025-09-30 23:01:02.299083', 'step': 472, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:02.390079', 'step': 472, 'epoch': 1} {'type': 'loss', 'content': 0.01868775486946106, 'timestamp': '2025-09-30 23:01:02.405889', 'step': 473, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:02.502393', 'step': 473, 'epoch': 1} {'type': 'loss', 'content': 0.0710601881146431, 'timestamp': '2025-09-30 23:01:02.520081', 'step': 474, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:02.612967', 'step': 474, 'epoch': 1} {'type': 'loss', 'content': 0.015896247699856758, 'timestamp': '2025-09-30 23:01:02.621982', 'step': 475, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:01:02.691111', 'step': 475, 'epoch': 1} {'type': 'loss', 'content': 0.048924338072538376, 'timestamp': '2025-09-30 23:01:02.701069', 'step': 476, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:02.769296', 'step': 476, 'epoch': 1} {'type': 'loss', 'content': 0.034489359706640244, 'timestamp': '2025-09-30 23:01:02.773224', 'step': 477, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:01:02.834118', 'step': 477, 'epoch': 1} {'type': 'loss', 'content': 0.05319134145975113, 'timestamp': '2025-09-30 23:01:02.837924', 'step': 478, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:02.892764', 'step': 478, 'epoch': 1} {'type': 'loss', 'content': 0.039473291486501694, 'timestamp': '2025-09-30 23:01:02.896082', 'step': 479, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:02.951922', 'step': 479, 'epoch': 1} {'type': 'loss', 'content': 0.05784866586327553, 'timestamp': '2025-09-30 23:01:02.957429', 'step': 480, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:01:03.010713', 'step': 480, 'epoch': 1} {'type': 'loss', 'content': 0.021013911813497543, 'timestamp': '2025-09-30 23:01:03.024865', 'step': 481, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:03.079888', 'step': 481, 'epoch': 1} {'type': 'loss', 'content': 0.05151502043008804, 'timestamp': '2025-09-30 23:01:03.083090', 'step': 482, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:03.137251', 'step': 482, 'epoch': 1} {'type': 'loss', 'content': 0.024291453883051872, 'timestamp': '2025-09-30 23:01:03.140613', 'step': 483, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:01:03.196194', 'step': 483, 'epoch': 1} {'type': 'loss', 'content': 0.04968034103512764, 'timestamp': '2025-09-30 23:01:03.202042', 'step': 484, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:01:03.255423', 'step': 484, 'epoch': 1} {'type': 'loss', 'content': 0.04158123582601547, 'timestamp': '2025-09-30 23:01:03.261583', 'step': 485, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:03.328642', 'step': 485, 'epoch': 1} {'type': 'loss', 'content': 0.06492061167955399, 'timestamp': '2025-09-30 23:01:03.331200', 'step': 486, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:03.387544', 'step': 486, 'epoch': 1} {'type': 'loss', 'content': 0.032502710819244385, 'timestamp': '2025-09-30 23:01:03.392146', 'step': 487, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:01:03.463589', 'step': 487, 'epoch': 1} {'type': 'loss', 'content': 0.024346616119146347, 'timestamp': '2025-09-30 23:01:03.476532', 'step': 488, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:03.547223', 'step': 488, 'epoch': 1} {'type': 'loss', 'content': 0.0658428966999054, 'timestamp': '2025-09-30 23:01:03.550894', 'step': 489, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:03.608317', 'step': 489, 'epoch': 1} {'type': 'loss', 'content': 0.02477976121008396, 'timestamp': '2025-09-30 23:01:03.614967', 'step': 490, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:03.677746', 'step': 490, 'epoch': 1} {'type': 'loss', 'content': 0.0505065955221653, 'timestamp': '2025-09-30 23:01:03.683149', 'step': 491, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:01:03.748700', 'step': 491, 'epoch': 1} {'type': 'loss', 'content': 0.047060899436473846, 'timestamp': '2025-09-30 23:01:03.755689', 'step': 492, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:03.834998', 'step': 492, 'epoch': 1} {'type': 'loss', 'content': 0.07861166447401047, 'timestamp': '2025-09-30 23:01:03.849716', 'step': 493, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:03.912390', 'step': 493, 'epoch': 1} {'type': 'loss', 'content': 0.051664527505636215, 'timestamp': '2025-09-30 23:01:03.916760', 'step': 494, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:03.989366', 'step': 494, 'epoch': 1} {'type': 'loss', 'content': 0.05405418947339058, 'timestamp': '2025-09-30 23:01:04.003735', 'step': 495, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:04.104754', 'step': 495, 'epoch': 1} {'type': 'loss', 'content': 0.018131230026483536, 'timestamp': '2025-09-30 23:01:04.116054', 'step': 496, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:01:04.192000', 'step': 496, 'epoch': 1} {'type': 'loss', 'content': 0.03736202046275139, 'timestamp': '2025-09-30 23:01:04.201769', 'step': 497, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:04.274115', 'step': 497, 'epoch': 1} {'type': 'loss', 'content': 0.045690786093473434, 'timestamp': '2025-09-30 23:01:04.277599', 'step': 498, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:04.339076', 'step': 498, 'epoch': 1} {'type': 'loss', 'content': 0.030156703665852547, 'timestamp': '2025-09-30 23:01:04.346123', 'step': 499, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:04.410323', 'step': 499, 'epoch': 1} {'type': 'loss', 'content': 0.01451233122497797, 'timestamp': '2025-09-30 23:01:04.420889', 'step': 500, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 500', 'timestamp': '2025-09-30 23:01:04.860709', 'step': 500, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:04.922289', 'step': 500, 'epoch': 1} {'type': 'loss', 'content': 0.014496559277176857, 'timestamp': '2025-09-30 23:01:04.929114', 'step': 501, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:04.997942', 'step': 501, 'epoch': 1} {'type': 'loss', 'content': 0.013852759264409542, 'timestamp': '2025-09-30 23:01:05.005859', 'step': 502, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:05.078341', 'step': 502, 'epoch': 1} {'type': 'loss', 'content': 0.05450353026390076, 'timestamp': '2025-09-30 23:01:05.083207', 'step': 503, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:01:05.146995', 'step': 503, 'epoch': 1} {'type': 'loss', 'content': 0.024278605356812477, 'timestamp': '2025-09-30 23:01:05.153660', 'step': 504, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:01:05.209221', 'step': 504, 'epoch': 1} {'type': 'loss', 'content': 0.02859305962920189, 'timestamp': '2025-09-30 23:01:05.221944', 'step': 505, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:05.305873', 'step': 505, 'epoch': 1} {'type': 'loss', 'content': 0.031295716762542725, 'timestamp': '2025-09-30 23:01:05.316799', 'step': 506, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:05.386617', 'step': 506, 'epoch': 1} {'type': 'loss', 'content': 0.03868282213807106, 'timestamp': '2025-09-30 23:01:05.395517', 'step': 507, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:05.464007', 'step': 507, 'epoch': 1} {'type': 'loss', 'content': 0.05302651599049568, 'timestamp': '2025-09-30 23:01:05.476013', 'step': 508, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:05.537683', 'step': 508, 'epoch': 1} {'type': 'loss', 'content': 0.022498423233628273, 'timestamp': '2025-09-30 23:01:05.544177', 'step': 509, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:05.610938', 'step': 509, 'epoch': 1} {'type': 'loss', 'content': 0.05017716437578201, 'timestamp': '2025-09-30 23:01:05.613780', 'step': 510, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:01:05.668552', 'step': 510, 'epoch': 1} {'type': 'loss', 'content': 0.04226579889655113, 'timestamp': '2025-09-30 23:01:05.677633', 'step': 511, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:01:05.741056', 'step': 511, 'epoch': 1} {'type': 'loss', 'content': 0.037551041692495346, 'timestamp': '2025-09-30 23:01:05.749460', 'step': 512, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:05.814248', 'step': 512, 'epoch': 1} {'type': 'loss', 'content': 0.037024129182100296, 'timestamp': '2025-09-30 23:01:05.817379', 'step': 513, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:05.884711', 'step': 513, 'epoch': 1} {'type': 'loss', 'content': 0.039116621017456055, 'timestamp': '2025-09-30 23:01:05.890236', 'step': 514, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:05.966397', 'step': 514, 'epoch': 1} {'type': 'loss', 'content': 0.03628412261605263, 'timestamp': '2025-09-30 23:01:05.972966', 'step': 515, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:06.044630', 'step': 515, 'epoch': 1} {'type': 'loss', 'content': 0.05274801328778267, 'timestamp': '2025-09-30 23:01:06.051483', 'step': 516, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:06.119490', 'step': 516, 'epoch': 1} {'type': 'loss', 'content': 0.005865273531526327, 'timestamp': '2025-09-30 23:01:06.121775', 'step': 517, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:06.176013', 'step': 517, 'epoch': 1} {'type': 'loss', 'content': 0.04507339000701904, 'timestamp': '2025-09-30 23:01:06.185548', 'step': 518, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:06.250205', 'step': 518, 'epoch': 1} {'type': 'loss', 'content': 0.03203064575791359, 'timestamp': '2025-09-30 23:01:06.257801', 'step': 519, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:06.319149', 'step': 519, 'epoch': 1} {'type': 'loss', 'content': 0.035769086331129074, 'timestamp': '2025-09-30 23:01:06.325548', 'step': 520, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:06.391102', 'step': 520, 'epoch': 1} {'type': 'loss', 'content': 0.021534787490963936, 'timestamp': '2025-09-30 23:01:06.399041', 'step': 521, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:06.474466', 'step': 521, 'epoch': 1} {'type': 'loss', 'content': 0.025235939770936966, 'timestamp': '2025-09-30 23:01:06.484030', 'step': 522, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:06.559968', 'step': 522, 'epoch': 1} {'type': 'loss', 'content': 0.05269134044647217, 'timestamp': '2025-09-30 23:01:06.572086', 'step': 523, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:06.647241', 'step': 523, 'epoch': 1} {'type': 'loss', 'content': 0.02646251954138279, 'timestamp': '2025-09-30 23:01:06.660157', 'step': 524, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:06.736352', 'step': 524, 'epoch': 1} {'type': 'loss', 'content': 0.01974858157336712, 'timestamp': '2025-09-30 23:01:06.741912', 'step': 525, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:06.811382', 'step': 525, 'epoch': 1} {'type': 'loss', 'content': 0.0560244657099247, 'timestamp': '2025-09-30 23:01:06.818487', 'step': 526, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:06.885348', 'step': 526, 'epoch': 1} {'type': 'loss', 'content': 0.032766710966825485, 'timestamp': '2025-09-30 23:01:06.890839', 'step': 527, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:06.955194', 'step': 527, 'epoch': 1} {'type': 'loss', 'content': 0.025081980973482132, 'timestamp': '2025-09-30 23:01:06.961724', 'step': 528, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:07.023294', 'step': 528, 'epoch': 1} {'type': 'loss', 'content': 0.0541628822684288, 'timestamp': '2025-09-30 23:01:07.030231', 'step': 529, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:07.105848', 'step': 529, 'epoch': 1} {'type': 'loss', 'content': 0.05981593579053879, 'timestamp': '2025-09-30 23:01:07.114346', 'step': 530, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:01:07.185241', 'step': 530, 'epoch': 1} {'type': 'loss', 'content': 0.011520902626216412, 'timestamp': '2025-09-30 23:01:07.194873', 'step': 531, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:07.269444', 'step': 531, 'epoch': 1} {'type': 'loss', 'content': 0.04331650584936142, 'timestamp': '2025-09-30 23:01:07.277242', 'step': 532, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:07.353073', 'step': 532, 'epoch': 1} {'type': 'loss', 'content': 0.020397335290908813, 'timestamp': '2025-09-30 23:01:07.362116', 'step': 533, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:07.436983', 'step': 533, 'epoch': 1} {'type': 'loss', 'content': 0.06619611382484436, 'timestamp': '2025-09-30 23:01:07.443285', 'step': 534, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:07.512219', 'step': 534, 'epoch': 1} {'type': 'loss', 'content': 0.03481188043951988, 'timestamp': '2025-09-30 23:01:07.515546', 'step': 535, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:07.581678', 'step': 535, 'epoch': 1} {'type': 'loss', 'content': 0.04723184183239937, 'timestamp': '2025-09-30 23:01:07.588784', 'step': 536, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:07.657599', 'step': 536, 'epoch': 1} {'type': 'loss', 'content': 0.01972922496497631, 'timestamp': '2025-09-30 23:01:07.661830', 'step': 537, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:07.717978', 'step': 537, 'epoch': 1} {'type': 'loss', 'content': 0.034555207937955856, 'timestamp': '2025-09-30 23:01:07.727260', 'step': 538, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:07.790589', 'step': 538, 'epoch': 1} {'type': 'loss', 'content': 0.01806645095348358, 'timestamp': '2025-09-30 23:01:07.799763', 'step': 539, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:07.885365', 'step': 539, 'epoch': 1} {'type': 'loss', 'content': 0.03334914147853851, 'timestamp': '2025-09-30 23:01:07.904703', 'step': 540, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:07.981069', 'step': 540, 'epoch': 1} {'type': 'loss', 'content': 0.050369344651699066, 'timestamp': '2025-09-30 23:01:07.997167', 'step': 541, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:08.112917', 'step': 541, 'epoch': 1} {'type': 'loss', 'content': 0.08365499973297119, 'timestamp': '2025-09-30 23:01:08.130428', 'step': 542, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:08.231136', 'step': 542, 'epoch': 1} {'type': 'loss', 'content': 0.03486918285489082, 'timestamp': '2025-09-30 23:01:08.251805', 'step': 543, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:08.340586', 'step': 543, 'epoch': 1} {'type': 'loss', 'content': 0.04367095232009888, 'timestamp': '2025-09-30 23:01:08.361827', 'step': 544, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:08.437042', 'step': 544, 'epoch': 1} {'type': 'loss', 'content': 0.05504073575139046, 'timestamp': '2025-09-30 23:01:08.441375', 'step': 545, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:08.500818', 'step': 545, 'epoch': 1} {'type': 'loss', 'content': 0.029755985364317894, 'timestamp': '2025-09-30 23:01:08.505238', 'step': 546, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:08.561758', 'step': 546, 'epoch': 1} {'type': 'loss', 'content': 0.025243042036890984, 'timestamp': '2025-09-30 23:01:08.564422', 'step': 547, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:08.624012', 'step': 547, 'epoch': 1} {'type': 'loss', 'content': 0.0283342357724905, 'timestamp': '2025-09-30 23:01:08.629813', 'step': 548, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:08.684265', 'step': 548, 'epoch': 1} {'type': 'loss', 'content': 0.02329109236598015, 'timestamp': '2025-09-30 23:01:08.686425', 'step': 549, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:08.739890', 'step': 549, 'epoch': 1} {'type': 'loss', 'content': 0.03469039127230644, 'timestamp': '2025-09-30 23:01:08.742609', 'step': 550, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:08.795698', 'step': 550, 'epoch': 1} {'type': 'loss', 'content': 0.02212851122021675, 'timestamp': '2025-09-30 23:01:08.800360', 'step': 551, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:08.861352', 'step': 551, 'epoch': 1} {'type': 'loss', 'content': 0.03443489596247673, 'timestamp': '2025-09-30 23:01:08.869367', 'step': 552, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:08.945199', 'step': 552, 'epoch': 1} {'type': 'loss', 'content': 0.02295873872935772, 'timestamp': '2025-09-30 23:01:08.951627', 'step': 553, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:09.012466', 'step': 553, 'epoch': 1} {'type': 'loss', 'content': 0.03825094550848007, 'timestamp': '2025-09-30 23:01:09.016201', 'step': 554, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:09.074270', 'step': 554, 'epoch': 1} {'type': 'loss', 'content': 0.03332429751753807, 'timestamp': '2025-09-30 23:01:09.078560', 'step': 555, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:09.137140', 'step': 555, 'epoch': 1} {'type': 'loss', 'content': 0.026689020916819572, 'timestamp': '2025-09-30 23:01:09.144907', 'step': 556, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:09.204156', 'step': 556, 'epoch': 1} {'type': 'loss', 'content': 0.04781361669301987, 'timestamp': '2025-09-30 23:01:09.206632', 'step': 557, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:09.262270', 'step': 557, 'epoch': 1} {'type': 'loss', 'content': 0.030741387978196144, 'timestamp': '2025-09-30 23:01:09.265951', 'step': 558, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:01:09.331540', 'step': 558, 'epoch': 1} {'type': 'loss', 'content': 0.015493580140173435, 'timestamp': '2025-09-30 23:01:09.341754', 'step': 559, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:01:09.423805', 'step': 559, 'epoch': 1} {'type': 'loss', 'content': 0.039514996111392975, 'timestamp': '2025-09-30 23:01:09.430733', 'step': 560, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:09.492363', 'step': 560, 'epoch': 1} {'type': 'loss', 'content': 0.039791740477085114, 'timestamp': '2025-09-30 23:01:09.497723', 'step': 561, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:09.599235', 'step': 561, 'epoch': 1} {'type': 'loss', 'content': 0.04944800212979317, 'timestamp': '2025-09-30 23:01:09.615409', 'step': 562, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:09.710766', 'step': 562, 'epoch': 1} {'type': 'loss', 'content': 0.057864390313625336, 'timestamp': '2025-09-30 23:01:09.717885', 'step': 563, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:09.793659', 'step': 563, 'epoch': 1} {'type': 'loss', 'content': 0.052753936499357224, 'timestamp': '2025-09-30 23:01:09.806252', 'step': 564, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:09.874258', 'step': 564, 'epoch': 1} {'type': 'loss', 'content': 0.03200702369213104, 'timestamp': '2025-09-30 23:01:09.885442', 'step': 565, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:09.964727', 'step': 565, 'epoch': 1} {'type': 'loss', 'content': 0.056212835013866425, 'timestamp': '2025-09-30 23:01:09.970705', 'step': 566, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:10.034388', 'step': 566, 'epoch': 1} {'type': 'loss', 'content': 0.031305067241191864, 'timestamp': '2025-09-30 23:01:10.050382', 'step': 567, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:10.137564', 'step': 567, 'epoch': 1} {'type': 'loss', 'content': 0.034125782549381256, 'timestamp': '2025-09-30 23:01:10.154305', 'step': 568, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:01:10.236488', 'step': 568, 'epoch': 1} {'type': 'loss', 'content': 0.019433604553341866, 'timestamp': '2025-09-30 23:01:10.247005', 'step': 569, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:10.331131', 'step': 569, 'epoch': 1} {'type': 'loss', 'content': 0.03794574365019798, 'timestamp': '2025-09-30 23:01:10.334826', 'step': 570, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:10.413625', 'step': 570, 'epoch': 1} {'type': 'loss', 'content': 0.02816013991832733, 'timestamp': '2025-09-30 23:01:10.423154', 'step': 571, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:10.488285', 'step': 571, 'epoch': 1} {'type': 'loss', 'content': 0.036115776747465134, 'timestamp': '2025-09-30 23:01:10.500057', 'step': 572, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:10.575473', 'step': 572, 'epoch': 1} {'type': 'loss', 'content': 0.061875008046627045, 'timestamp': '2025-09-30 23:01:10.582997', 'step': 573, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:10.650686', 'step': 573, 'epoch': 1} {'type': 'loss', 'content': 0.04686696082353592, 'timestamp': '2025-09-30 23:01:10.659648', 'step': 574, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:10.741295', 'step': 574, 'epoch': 1} {'type': 'loss', 'content': 0.01215475331991911, 'timestamp': '2025-09-30 23:01:10.750264', 'step': 575, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:10.829807', 'step': 575, 'epoch': 1} {'type': 'loss', 'content': 0.031227517873048782, 'timestamp': '2025-09-30 23:01:10.839833', 'step': 576, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:10.913868', 'step': 576, 'epoch': 1} {'type': 'loss', 'content': 0.031892359256744385, 'timestamp': '2025-09-30 23:01:10.925025', 'step': 577, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:11.006514', 'step': 577, 'epoch': 1} {'type': 'loss', 'content': 0.02821127511560917, 'timestamp': '2025-09-30 23:01:11.019215', 'step': 578, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:11.104069', 'step': 578, 'epoch': 1} {'type': 'loss', 'content': 0.017120327800512314, 'timestamp': '2025-09-30 23:01:11.115390', 'step': 579, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:11.198818', 'step': 579, 'epoch': 1} {'type': 'loss', 'content': 0.05151597782969475, 'timestamp': '2025-09-30 23:01:11.215016', 'step': 580, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:01:11.301519', 'step': 580, 'epoch': 1} {'type': 'loss', 'content': 0.039662618190050125, 'timestamp': '2025-09-30 23:01:11.309217', 'step': 581, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:11.393813', 'step': 581, 'epoch': 1} {'type': 'loss', 'content': 0.04382244870066643, 'timestamp': '2025-09-30 23:01:11.402700', 'step': 582, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:11.484669', 'step': 582, 'epoch': 1} {'type': 'loss', 'content': 0.03443301096558571, 'timestamp': '2025-09-30 23:01:11.495282', 'step': 583, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:11.582519', 'step': 583, 'epoch': 1} {'type': 'loss', 'content': 0.021749304607510567, 'timestamp': '2025-09-30 23:01:11.597865', 'step': 584, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:11.669239', 'step': 584, 'epoch': 1} {'type': 'loss', 'content': 0.02451348677277565, 'timestamp': '2025-09-30 23:01:11.680634', 'step': 585, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:11.762540', 'step': 585, 'epoch': 1} {'type': 'loss', 'content': 0.02418426051735878, 'timestamp': '2025-09-30 23:01:11.770561', 'step': 586, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:11.845716', 'step': 586, 'epoch': 1} {'type': 'loss', 'content': 0.04426855966448784, 'timestamp': '2025-09-30 23:01:11.857000', 'step': 587, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:11.933620', 'step': 587, 'epoch': 1} {'type': 'loss', 'content': 0.01878538355231285, 'timestamp': '2025-09-30 23:01:11.941328', 'step': 588, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:01:12.015058', 'step': 588, 'epoch': 1} {'type': 'loss', 'content': 0.0472663976252079, 'timestamp': '2025-09-30 23:01:12.028468', 'step': 589, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:12.106364', 'step': 589, 'epoch': 1} {'type': 'loss', 'content': 0.026535267010331154, 'timestamp': '2025-09-30 23:01:12.117129', 'step': 590, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:12.189888', 'step': 590, 'epoch': 1} {'type': 'loss', 'content': 0.016201894730329514, 'timestamp': '2025-09-30 23:01:12.196766', 'step': 591, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:12.276479', 'step': 591, 'epoch': 1} {'type': 'loss', 'content': 0.04233170300722122, 'timestamp': '2025-09-30 23:01:12.288913', 'step': 592, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:12.351749', 'step': 592, 'epoch': 1} {'type': 'loss', 'content': 0.025627130642533302, 'timestamp': '2025-09-30 23:01:12.355176', 'step': 593, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:12.416408', 'step': 593, 'epoch': 1} {'type': 'loss', 'content': 0.03188071772456169, 'timestamp': '2025-09-30 23:01:12.423669', 'step': 594, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:12.522687', 'step': 594, 'epoch': 1} {'type': 'loss', 'content': 0.028335098177194595, 'timestamp': '2025-09-30 23:01:12.526449', 'step': 595, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:01:12.604930', 'step': 595, 'epoch': 1} {'type': 'loss', 'content': 0.012501992285251617, 'timestamp': '2025-09-30 23:01:12.616807', 'step': 596, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:12.691479', 'step': 596, 'epoch': 1} {'type': 'loss', 'content': 0.04309256002306938, 'timestamp': '2025-09-30 23:01:12.702293', 'step': 597, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:01:12.770647', 'step': 597, 'epoch': 1} {'type': 'loss', 'content': 0.016380390152335167, 'timestamp': '2025-09-30 23:01:12.780206', 'step': 598, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:12.849872', 'step': 598, 'epoch': 1} {'type': 'loss', 'content': 0.01512268465012312, 'timestamp': '2025-09-30 23:01:12.859586', 'step': 599, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:12.934671', 'step': 599, 'epoch': 1} {'type': 'loss', 'content': 0.04646097496151924, 'timestamp': '2025-09-30 23:01:12.946813', 'step': 600, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:13.018127', 'step': 600, 'epoch': 1} {'type': 'loss', 'content': 0.04619088023900986, 'timestamp': '2025-09-30 23:01:13.035837', 'step': 601, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:13.109677', 'step': 601, 'epoch': 1} {'type': 'loss', 'content': 0.04907592386007309, 'timestamp': '2025-09-30 23:01:13.119854', 'step': 602, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:13.194311', 'step': 602, 'epoch': 1} {'type': 'loss', 'content': 0.005457249004393816, 'timestamp': '2025-09-30 23:01:13.197483', 'step': 603, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:13.268806', 'step': 603, 'epoch': 1} {'type': 'loss', 'content': 0.01744483970105648, 'timestamp': '2025-09-30 23:01:13.281416', 'step': 604, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:13.345902', 'step': 604, 'epoch': 1} {'type': 'loss', 'content': 0.03033006750047207, 'timestamp': '2025-09-30 23:01:13.352334', 'step': 605, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:13.428177', 'step': 605, 'epoch': 1} {'type': 'loss', 'content': 0.0368046760559082, 'timestamp': '2025-09-30 23:01:13.433064', 'step': 606, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:13.516294', 'step': 606, 'epoch': 1} {'type': 'loss', 'content': 0.006652175914496183, 'timestamp': '2025-09-30 23:01:13.527354', 'step': 607, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:13.601081', 'step': 607, 'epoch': 1} {'type': 'loss', 'content': 0.0496525838971138, 'timestamp': '2025-09-30 23:01:13.618062', 'step': 608, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1596914505344}], 'timestamp': '2025-09-30 23:01:19.679982', 'step': 608, 'epoch': 1} {'type': 'pplx', 'content': 5547306.847342785, 'timestamp': '2025-09-30 23:01:19.694031', 'step': 608, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:19.762206', 'step': 608, 'epoch': 1} {'type': 'loss', 'content': 0.07541151344776154, 'timestamp': '2025-09-30 23:01:19.770751', 'step': 609, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:19.834628', 'step': 609, 'epoch': 1} {'type': 'loss', 'content': 0.02806697227060795, 'timestamp': '2025-09-30 23:01:19.852661', 'step': 610, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:19.936094', 'step': 610, 'epoch': 1} {'type': 'loss', 'content': 0.01629084162414074, 'timestamp': '2025-09-30 23:01:19.941592', 'step': 611, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:20.013701', 'step': 611, 'epoch': 1} {'type': 'loss', 'content': 0.010946636088192463, 'timestamp': '2025-09-30 23:01:20.026353', 'step': 612, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:20.106603', 'step': 612, 'epoch': 1} {'type': 'loss', 'content': 0.040636125952005386, 'timestamp': '2025-09-30 23:01:20.112027', 'step': 613, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:20.190499', 'step': 613, 'epoch': 1} {'type': 'loss', 'content': 0.047663770616054535, 'timestamp': '2025-09-30 23:01:20.193870', 'step': 614, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:20.256832', 'step': 614, 'epoch': 1} {'type': 'loss', 'content': 0.006408171262592077, 'timestamp': '2025-09-30 23:01:20.266388', 'step': 615, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:20.329005', 'step': 615, 'epoch': 1} {'type': 'loss', 'content': 0.005471521057188511, 'timestamp': '2025-09-30 23:01:20.336150', 'step': 616, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:20.400220', 'step': 616, 'epoch': 1} {'type': 'loss', 'content': 0.05962811037898064, 'timestamp': '2025-09-30 23:01:20.404738', 'step': 617, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:20.460430', 'step': 617, 'epoch': 1} {'type': 'loss', 'content': 0.037103865295648575, 'timestamp': '2025-09-30 23:01:20.464158', 'step': 618, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:20.524086', 'step': 618, 'epoch': 1} {'type': 'loss', 'content': 0.024409620091319084, 'timestamp': '2025-09-30 23:01:20.528227', 'step': 619, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:20.587872', 'step': 619, 'epoch': 1} {'type': 'loss', 'content': 0.0143808638677001, 'timestamp': '2025-09-30 23:01:20.600457', 'step': 620, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:01:20.666019', 'step': 620, 'epoch': 1} {'type': 'loss', 'content': 0.008895891718566418, 'timestamp': '2025-09-30 23:01:20.673365', 'step': 621, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:20.747950', 'step': 621, 'epoch': 1} {'type': 'loss', 'content': 0.02897481620311737, 'timestamp': '2025-09-30 23:01:20.753176', 'step': 622, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:20.820914', 'step': 622, 'epoch': 1} {'type': 'loss', 'content': 0.04886695742607117, 'timestamp': '2025-09-30 23:01:20.825869', 'step': 623, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:20.905052', 'step': 623, 'epoch': 1} {'type': 'loss', 'content': 0.07326585054397583, 'timestamp': '2025-09-30 23:01:20.920516', 'step': 624, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:21.012062', 'step': 624, 'epoch': 1} {'type': 'loss', 'content': 0.07064451277256012, 'timestamp': '2025-09-30 23:01:21.021002', 'step': 625, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:21.106028', 'step': 625, 'epoch': 1} {'type': 'loss', 'content': 0.04355839639902115, 'timestamp': '2025-09-30 23:01:21.116744', 'step': 626, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:21.199416', 'step': 626, 'epoch': 1} {'type': 'loss', 'content': 0.026019016280770302, 'timestamp': '2025-09-30 23:01:21.207051', 'step': 627, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:21.285061', 'step': 627, 'epoch': 1} {'type': 'loss', 'content': 0.02496199496090412, 'timestamp': '2025-09-30 23:01:21.300186', 'step': 628, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:21.368878', 'step': 628, 'epoch': 1} {'type': 'loss', 'content': 0.035170771181583405, 'timestamp': '2025-09-30 23:01:21.377718', 'step': 629, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:21.459396', 'step': 629, 'epoch': 1} {'type': 'loss', 'content': 0.07201860100030899, 'timestamp': '2025-09-30 23:01:21.471726', 'step': 630, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:21.543405', 'step': 630, 'epoch': 1} {'type': 'loss', 'content': 0.03778615966439247, 'timestamp': '2025-09-30 23:01:21.552873', 'step': 631, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:21.626602', 'step': 631, 'epoch': 1} {'type': 'loss', 'content': 0.05438634753227234, 'timestamp': '2025-09-30 23:01:21.633498', 'step': 632, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:21.700485', 'step': 632, 'epoch': 1} {'type': 'loss', 'content': 0.006034324411302805, 'timestamp': '2025-09-30 23:01:21.708928', 'step': 633, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:21.778003', 'step': 633, 'epoch': 1} {'type': 'loss', 'content': 0.04807204380631447, 'timestamp': '2025-09-30 23:01:21.789593', 'step': 634, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:21.862016', 'step': 634, 'epoch': 1} {'type': 'loss', 'content': 0.033254992216825485, 'timestamp': '2025-09-30 23:01:21.864859', 'step': 635, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:21.944385', 'step': 635, 'epoch': 1} {'type': 'loss', 'content': 0.02719157747924328, 'timestamp': '2025-09-30 23:01:21.950896', 'step': 636, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:22.018751', 'step': 636, 'epoch': 1} {'type': 'loss', 'content': 0.00784881878644228, 'timestamp': '2025-09-30 23:01:22.027427', 'step': 637, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:22.108394', 'step': 637, 'epoch': 1} {'type': 'loss', 'content': 0.04540562257170677, 'timestamp': '2025-09-30 23:01:22.115875', 'step': 638, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:01:22.182466', 'step': 638, 'epoch': 1} {'type': 'loss', 'content': 0.025386694818735123, 'timestamp': '2025-09-30 23:01:22.191513', 'step': 639, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:22.254061', 'step': 639, 'epoch': 1} {'type': 'loss', 'content': 0.03658396005630493, 'timestamp': '2025-09-30 23:01:22.262017', 'step': 640, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:22.338887', 'step': 640, 'epoch': 1} {'type': 'loss', 'content': 0.02811594121158123, 'timestamp': '2025-09-30 23:01:22.362934', 'step': 641, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:22.443894', 'step': 641, 'epoch': 1} {'type': 'loss', 'content': 0.029989276081323624, 'timestamp': '2025-09-30 23:01:22.447533', 'step': 642, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:22.522205', 'step': 642, 'epoch': 1} {'type': 'loss', 'content': 0.04389957711100578, 'timestamp': '2025-09-30 23:01:22.533839', 'step': 643, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:22.602229', 'step': 643, 'epoch': 1} {'type': 'loss', 'content': 0.05785181745886803, 'timestamp': '2025-09-30 23:01:22.617751', 'step': 644, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:22.693063', 'step': 644, 'epoch': 1} {'type': 'loss', 'content': 0.054299868643283844, 'timestamp': '2025-09-30 23:01:22.701595', 'step': 645, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:01:22.771576', 'step': 645, 'epoch': 1} {'type': 'loss', 'content': 0.031794555485248566, 'timestamp': '2025-09-30 23:01:22.783352', 'step': 646, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:22.860903', 'step': 646, 'epoch': 1} {'type': 'loss', 'content': 0.021228130906820297, 'timestamp': '2025-09-30 23:01:22.870286', 'step': 647, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:22.947445', 'step': 647, 'epoch': 1} {'type': 'loss', 'content': 0.04349517822265625, 'timestamp': '2025-09-30 23:01:22.958125', 'step': 648, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:23.032097', 'step': 648, 'epoch': 1} {'type': 'loss', 'content': 0.030997663736343384, 'timestamp': '2025-09-30 23:01:23.039196', 'step': 649, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:23.116903', 'step': 649, 'epoch': 1} {'type': 'loss', 'content': 0.03497500345110893, 'timestamp': '2025-09-30 23:01:23.122059', 'step': 650, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:23.193647', 'step': 650, 'epoch': 1} {'type': 'loss', 'content': 0.017582865431904793, 'timestamp': '2025-09-30 23:01:23.202432', 'step': 651, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:23.269654', 'step': 651, 'epoch': 1} {'type': 'loss', 'content': 0.029038412496447563, 'timestamp': '2025-09-30 23:01:23.278444', 'step': 652, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:23.349225', 'step': 652, 'epoch': 1} {'type': 'loss', 'content': 0.017728062346577644, 'timestamp': '2025-09-30 23:01:23.356928', 'step': 653, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:23.418091', 'step': 653, 'epoch': 1} {'type': 'loss', 'content': 0.02866002358496189, 'timestamp': '2025-09-30 23:01:23.429579', 'step': 654, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:23.504579', 'step': 654, 'epoch': 1} {'type': 'loss', 'content': 0.034623317420482635, 'timestamp': '2025-09-30 23:01:23.515857', 'step': 655, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:23.590064', 'step': 655, 'epoch': 1} {'type': 'loss', 'content': 0.0490330345928669, 'timestamp': '2025-09-30 23:01:23.597157', 'step': 656, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:23.665960', 'step': 656, 'epoch': 1} {'type': 'loss', 'content': 0.03360982611775398, 'timestamp': '2025-09-30 23:01:23.677278', 'step': 657, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:01:23.755810', 'step': 657, 'epoch': 1} {'type': 'loss', 'content': 0.04615866392850876, 'timestamp': '2025-09-30 23:01:23.766106', 'step': 658, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:23.844336', 'step': 658, 'epoch': 1} {'type': 'loss', 'content': 0.025838572531938553, 'timestamp': '2025-09-30 23:01:23.847261', 'step': 659, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:23.918735', 'step': 659, 'epoch': 1} {'type': 'loss', 'content': 0.025707701221108437, 'timestamp': '2025-09-30 23:01:23.927896', 'step': 660, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:24.017713', 'step': 660, 'epoch': 1} {'type': 'loss', 'content': 0.05296885967254639, 'timestamp': '2025-09-30 23:01:24.028771', 'step': 661, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:24.106764', 'step': 661, 'epoch': 1} {'type': 'loss', 'content': 0.026485715061426163, 'timestamp': '2025-09-30 23:01:24.116129', 'step': 662, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:24.190666', 'step': 662, 'epoch': 1} {'type': 'loss', 'content': 0.058072399348020554, 'timestamp': '2025-09-30 23:01:24.198736', 'step': 663, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:24.266322', 'step': 663, 'epoch': 1} {'type': 'loss', 'content': 0.015042999759316444, 'timestamp': '2025-09-30 23:01:24.273549', 'step': 664, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:24.353698', 'step': 664, 'epoch': 1} {'type': 'loss', 'content': 0.013282723724842072, 'timestamp': '2025-09-30 23:01:24.364387', 'step': 665, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:24.441778', 'step': 665, 'epoch': 1} {'type': 'loss', 'content': 0.010380762629210949, 'timestamp': '2025-09-30 23:01:24.452011', 'step': 666, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:24.529560', 'step': 666, 'epoch': 1} {'type': 'loss', 'content': 0.04492660611867905, 'timestamp': '2025-09-30 23:01:24.541151', 'step': 667, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:24.620693', 'step': 667, 'epoch': 1} {'type': 'loss', 'content': 0.027886230498552322, 'timestamp': '2025-09-30 23:01:24.636469', 'step': 668, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:24.712336', 'step': 668, 'epoch': 1} {'type': 'loss', 'content': 0.037733014672994614, 'timestamp': '2025-09-30 23:01:24.721500', 'step': 669, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:24.790053', 'step': 669, 'epoch': 1} {'type': 'loss', 'content': 0.018635591492056847, 'timestamp': '2025-09-30 23:01:24.794703', 'step': 670, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:24.865771', 'step': 670, 'epoch': 1} {'type': 'loss', 'content': 0.045321010053157806, 'timestamp': '2025-09-30 23:01:24.875758', 'step': 671, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:24.949091', 'step': 671, 'epoch': 1} {'type': 'loss', 'content': 0.011379798874258995, 'timestamp': '2025-09-30 23:01:24.962916', 'step': 672, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:25.048961', 'step': 672, 'epoch': 1} {'type': 'loss', 'content': 0.03151679411530495, 'timestamp': '2025-09-30 23:01:25.054215', 'step': 673, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:25.126875', 'step': 673, 'epoch': 1} {'type': 'loss', 'content': 0.0398111455142498, 'timestamp': '2025-09-30 23:01:25.131272', 'step': 674, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:25.193265', 'step': 674, 'epoch': 1} {'type': 'loss', 'content': 0.010264841839671135, 'timestamp': '2025-09-30 23:01:25.202744', 'step': 675, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:01:25.271624', 'step': 675, 'epoch': 1} {'type': 'loss', 'content': 0.031494997441768646, 'timestamp': '2025-09-30 23:01:25.286582', 'step': 676, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:25.367059', 'step': 676, 'epoch': 1} {'type': 'loss', 'content': 0.05119546502828598, 'timestamp': '2025-09-30 23:01:25.379101', 'step': 677, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:25.452967', 'step': 677, 'epoch': 1} {'type': 'loss', 'content': 0.04522442817687988, 'timestamp': '2025-09-30 23:01:25.456021', 'step': 678, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:25.539050', 'step': 678, 'epoch': 1} {'type': 'loss', 'content': 0.036947499960660934, 'timestamp': '2025-09-30 23:01:25.552030', 'step': 679, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:25.625755', 'step': 679, 'epoch': 1} {'type': 'loss', 'content': 0.019582627341151237, 'timestamp': '2025-09-30 23:01:25.639954', 'step': 680, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:25.705616', 'step': 680, 'epoch': 1} {'type': 'loss', 'content': 0.01194621343165636, 'timestamp': '2025-09-30 23:01:25.709763', 'step': 681, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:25.785517', 'step': 681, 'epoch': 1} {'type': 'loss', 'content': 0.027539638802409172, 'timestamp': '2025-09-30 23:01:25.789027', 'step': 682, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:25.862791', 'step': 682, 'epoch': 1} {'type': 'loss', 'content': 0.013852422125637531, 'timestamp': '2025-09-30 23:01:25.867390', 'step': 683, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:25.926317', 'step': 683, 'epoch': 1} {'type': 'loss', 'content': 0.01818131096661091, 'timestamp': '2025-09-30 23:01:25.936153', 'step': 684, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:26.011342', 'step': 684, 'epoch': 1} {'type': 'loss', 'content': 0.007679333444684744, 'timestamp': '2025-09-30 23:01:26.022655', 'step': 685, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:26.096075', 'step': 685, 'epoch': 1} {'type': 'loss', 'content': 0.032011281698942184, 'timestamp': '2025-09-30 23:01:26.104556', 'step': 686, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:26.177654', 'step': 686, 'epoch': 1} {'type': 'loss', 'content': 0.03537698835134506, 'timestamp': '2025-09-30 23:01:26.181497', 'step': 687, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:26.249570', 'step': 687, 'epoch': 1} {'type': 'loss', 'content': 0.01764621213078499, 'timestamp': '2025-09-30 23:01:26.256798', 'step': 688, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:26.334431', 'step': 688, 'epoch': 1} {'type': 'loss', 'content': 0.029831720516085625, 'timestamp': '2025-09-30 23:01:26.346072', 'step': 689, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:26.418486', 'step': 689, 'epoch': 1} {'type': 'loss', 'content': 0.025525489822030067, 'timestamp': '2025-09-30 23:01:26.429828', 'step': 690, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:26.516632', 'step': 690, 'epoch': 1} {'type': 'loss', 'content': 0.054510705173015594, 'timestamp': '2025-09-30 23:01:26.524766', 'step': 691, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:26.604375', 'step': 691, 'epoch': 1} {'type': 'loss', 'content': 0.09279637038707733, 'timestamp': '2025-09-30 23:01:26.620573', 'step': 692, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:26.703483', 'step': 692, 'epoch': 1} {'type': 'loss', 'content': 0.034408409148454666, 'timestamp': '2025-09-30 23:01:26.712623', 'step': 693, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:26.791492', 'step': 693, 'epoch': 1} {'type': 'loss', 'content': 0.017123540863394737, 'timestamp': '2025-09-30 23:01:26.798297', 'step': 694, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:26.859681', 'step': 694, 'epoch': 1} {'type': 'loss', 'content': 0.015895919874310493, 'timestamp': '2025-09-30 23:01:26.871298', 'step': 695, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:26.944134', 'step': 695, 'epoch': 1} {'type': 'loss', 'content': 0.0278030913323164, 'timestamp': '2025-09-30 23:01:26.952038', 'step': 696, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:27.026597', 'step': 696, 'epoch': 1} {'type': 'loss', 'content': 0.03370068222284317, 'timestamp': '2025-09-30 23:01:27.030860', 'step': 697, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:27.105103', 'step': 697, 'epoch': 1} {'type': 'loss', 'content': 0.03149133548140526, 'timestamp': '2025-09-30 23:01:27.114949', 'step': 698, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:27.190842', 'step': 698, 'epoch': 1} {'type': 'loss', 'content': 0.03165972977876663, 'timestamp': '2025-09-30 23:01:27.195533', 'step': 699, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:27.259271', 'step': 699, 'epoch': 1} {'type': 'loss', 'content': 0.01321608480066061, 'timestamp': '2025-09-30 23:01:27.271478', 'step': 700, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:27.332461', 'step': 700, 'epoch': 1} {'type': 'loss', 'content': 0.015984507277607918, 'timestamp': '2025-09-30 23:01:27.341095', 'step': 701, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:01:27.414709', 'step': 701, 'epoch': 1} {'type': 'loss', 'content': 0.04435492306947708, 'timestamp': '2025-09-30 23:01:27.431371', 'step': 702, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:01:27.511068', 'step': 702, 'epoch': 1} {'type': 'loss', 'content': 0.019490819424390793, 'timestamp': '2025-09-30 23:01:27.522215', 'step': 703, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:27.607070', 'step': 703, 'epoch': 1} {'type': 'loss', 'content': 0.033825941383838654, 'timestamp': '2025-09-30 23:01:27.626823', 'step': 704, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:27.729401', 'step': 704, 'epoch': 1} {'type': 'loss', 'content': 0.0038150448817759752, 'timestamp': '2025-09-30 23:01:27.747885', 'step': 705, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:27.844896', 'step': 705, 'epoch': 1} {'type': 'loss', 'content': 0.039350688457489014, 'timestamp': '2025-09-30 23:01:27.850720', 'step': 706, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:27.942019', 'step': 706, 'epoch': 1} {'type': 'loss', 'content': 0.04444491118192673, 'timestamp': '2025-09-30 23:01:27.948826', 'step': 707, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:28.022113', 'step': 707, 'epoch': 1} {'type': 'loss', 'content': 0.018028000369668007, 'timestamp': '2025-09-30 23:01:28.042066', 'step': 708, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:28.137995', 'step': 708, 'epoch': 1} {'type': 'loss', 'content': 0.07023204118013382, 'timestamp': '2025-09-30 23:01:28.142296', 'step': 709, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:28.226434', 'step': 709, 'epoch': 1} {'type': 'loss', 'content': 0.02322898432612419, 'timestamp': '2025-09-30 23:01:28.233302', 'step': 710, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:28.312925', 'step': 710, 'epoch': 1} {'type': 'loss', 'content': 0.04197729006409645, 'timestamp': '2025-09-30 23:01:28.321256', 'step': 711, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:28.389225', 'step': 711, 'epoch': 1} {'type': 'loss', 'content': 0.028956213966012, 'timestamp': '2025-09-30 23:01:28.400596', 'step': 712, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:28.461681', 'step': 712, 'epoch': 1} {'type': 'loss', 'content': 0.08947447687387466, 'timestamp': '2025-09-30 23:01:28.466438', 'step': 713, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:28.528141', 'step': 713, 'epoch': 1} {'type': 'loss', 'content': 0.018689075484871864, 'timestamp': '2025-09-30 23:01:28.532551', 'step': 714, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:01:28.591436', 'step': 714, 'epoch': 1} {'type': 'loss', 'content': 0.01669180765748024, 'timestamp': '2025-09-30 23:01:28.596200', 'step': 715, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:28.656239', 'step': 715, 'epoch': 1} {'type': 'loss', 'content': 0.014352160505950451, 'timestamp': '2025-09-30 23:01:28.663717', 'step': 716, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:28.725311', 'step': 716, 'epoch': 1} {'type': 'loss', 'content': 0.029540663585066795, 'timestamp': '2025-09-30 23:01:28.729933', 'step': 717, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:28.789899', 'step': 717, 'epoch': 1} {'type': 'loss', 'content': 0.029578935354948044, 'timestamp': '2025-09-30 23:01:28.795789', 'step': 718, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:28.856020', 'step': 718, 'epoch': 1} {'type': 'loss', 'content': 0.01435443852096796, 'timestamp': '2025-09-30 23:01:28.860956', 'step': 719, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:28.921702', 'step': 719, 'epoch': 1} {'type': 'loss', 'content': 0.020745327696204185, 'timestamp': '2025-09-30 23:01:28.929625', 'step': 720, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:28.986003', 'step': 720, 'epoch': 1} {'type': 'loss', 'content': 0.04168462008237839, 'timestamp': '2025-09-30 23:01:28.994007', 'step': 721, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:29.060527', 'step': 721, 'epoch': 1} {'type': 'loss', 'content': 0.04045948386192322, 'timestamp': '2025-09-30 23:01:29.065149', 'step': 722, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:01:29.130245', 'step': 722, 'epoch': 1} {'type': 'loss', 'content': 0.038917507976293564, 'timestamp': '2025-09-30 23:01:29.137044', 'step': 723, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:29.205305', 'step': 723, 'epoch': 1} {'type': 'loss', 'content': 0.006401721388101578, 'timestamp': '2025-09-30 23:01:29.217726', 'step': 724, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:29.293879', 'step': 724, 'epoch': 1} {'type': 'loss', 'content': 0.03547866642475128, 'timestamp': '2025-09-30 23:01:29.304835', 'step': 725, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:29.387154', 'step': 725, 'epoch': 1} {'type': 'loss', 'content': 0.023773137480020523, 'timestamp': '2025-09-30 23:01:29.398413', 'step': 726, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:29.475152', 'step': 726, 'epoch': 1} {'type': 'loss', 'content': 0.04711119458079338, 'timestamp': '2025-09-30 23:01:29.487723', 'step': 727, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:29.549704', 'step': 727, 'epoch': 1} {'type': 'loss', 'content': 0.04841955378651619, 'timestamp': '2025-09-30 23:01:29.559888', 'step': 728, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:29.645131', 'step': 728, 'epoch': 1} {'type': 'loss', 'content': 0.04906556010246277, 'timestamp': '2025-09-30 23:01:29.649664', 'step': 729, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:29.729120', 'step': 729, 'epoch': 1} {'type': 'loss', 'content': 0.02961215004324913, 'timestamp': '2025-09-30 23:01:29.743723', 'step': 730, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:29.827171', 'step': 730, 'epoch': 1} {'type': 'loss', 'content': 0.037297967821359634, 'timestamp': '2025-09-30 23:01:29.839028', 'step': 731, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:29.925304', 'step': 731, 'epoch': 1} {'type': 'loss', 'content': 0.03560144826769829, 'timestamp': '2025-09-30 23:01:29.941759', 'step': 732, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:30.017503', 'step': 732, 'epoch': 1} {'type': 'loss', 'content': 0.054293952882289886, 'timestamp': '2025-09-30 23:01:30.023763', 'step': 733, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:30.106287', 'step': 733, 'epoch': 1} {'type': 'loss', 'content': 0.015502073802053928, 'timestamp': '2025-09-30 23:01:30.116497', 'step': 734, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:30.200633', 'step': 734, 'epoch': 1} {'type': 'loss', 'content': 0.04862033948302269, 'timestamp': '2025-09-30 23:01:30.210738', 'step': 735, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:30.281483', 'step': 735, 'epoch': 1} {'type': 'loss', 'content': 0.04229152575135231, 'timestamp': '2025-09-30 23:01:30.289163', 'step': 736, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:30.359097', 'step': 736, 'epoch': 1} {'type': 'loss', 'content': 0.01822049915790558, 'timestamp': '2025-09-30 23:01:30.370040', 'step': 737, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:30.433442', 'step': 737, 'epoch': 1} {'type': 'loss', 'content': 0.018190398812294006, 'timestamp': '2025-09-30 23:01:30.442188', 'step': 738, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:30.519302', 'step': 738, 'epoch': 1} {'type': 'loss', 'content': 0.03337325155735016, 'timestamp': '2025-09-30 23:01:30.529949', 'step': 739, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:30.601558', 'step': 739, 'epoch': 1} {'type': 'loss', 'content': 0.021087685599923134, 'timestamp': '2025-09-30 23:01:30.613747', 'step': 740, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:01:30.687323', 'step': 740, 'epoch': 1} {'type': 'loss', 'content': 0.03710019960999489, 'timestamp': '2025-09-30 23:01:30.691197', 'step': 741, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:30.754421', 'step': 741, 'epoch': 1} {'type': 'loss', 'content': 0.07074956595897675, 'timestamp': '2025-09-30 23:01:30.762788', 'step': 742, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:30.840843', 'step': 742, 'epoch': 1} {'type': 'loss', 'content': 0.004598034080117941, 'timestamp': '2025-09-30 23:01:30.851749', 'step': 743, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:30.923985', 'step': 743, 'epoch': 1} {'type': 'loss', 'content': 0.027880465611815453, 'timestamp': '2025-09-30 23:01:30.932529', 'step': 744, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:31.000807', 'step': 744, 'epoch': 1} {'type': 'loss', 'content': 0.06492619216442108, 'timestamp': '2025-09-30 23:01:31.010140', 'step': 745, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:31.082682', 'step': 745, 'epoch': 1} {'type': 'loss', 'content': 0.009510104544460773, 'timestamp': '2025-09-30 23:01:31.091179', 'step': 746, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:31.166007', 'step': 746, 'epoch': 1} {'type': 'loss', 'content': 0.027069492265582085, 'timestamp': '2025-09-30 23:01:31.174963', 'step': 747, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:31.254893', 'step': 747, 'epoch': 1} {'type': 'loss', 'content': 0.0561070553958416, 'timestamp': '2025-09-30 23:01:31.261854', 'step': 748, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:31.335101', 'step': 748, 'epoch': 1} {'type': 'loss', 'content': 0.02682889625430107, 'timestamp': '2025-09-30 23:01:31.344486', 'step': 749, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:01:31.425387', 'step': 749, 'epoch': 1} {'type': 'loss', 'content': 0.04579097405076027, 'timestamp': '2025-09-30 23:01:31.434693', 'step': 750, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:31.513122', 'step': 750, 'epoch': 1} {'type': 'loss', 'content': 0.04542069509625435, 'timestamp': '2025-09-30 23:01:31.523989', 'step': 751, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:31.586798', 'step': 751, 'epoch': 1} {'type': 'loss', 'content': 0.018707260489463806, 'timestamp': '2025-09-30 23:01:31.593737', 'step': 752, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:31.669476', 'step': 752, 'epoch': 1} {'type': 'loss', 'content': 0.03743531554937363, 'timestamp': '2025-09-30 23:01:31.678146', 'step': 753, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:31.753952', 'step': 753, 'epoch': 1} {'type': 'loss', 'content': 0.034936271607875824, 'timestamp': '2025-09-30 23:01:31.757146', 'step': 754, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:31.827975', 'step': 754, 'epoch': 1} {'type': 'loss', 'content': 0.04252883419394493, 'timestamp': '2025-09-30 23:01:31.836341', 'step': 755, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:31.912936', 'step': 755, 'epoch': 1} {'type': 'loss', 'content': 0.02145133540034294, 'timestamp': '2025-09-30 23:01:31.921307', 'step': 756, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:31.988037', 'step': 756, 'epoch': 1} {'type': 'loss', 'content': 0.025634780526161194, 'timestamp': '2025-09-30 23:01:32.002427', 'step': 757, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:32.076376', 'step': 757, 'epoch': 1} {'type': 'loss', 'content': 0.047392427921295166, 'timestamp': '2025-09-30 23:01:32.080433', 'step': 758, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:32.144334', 'step': 758, 'epoch': 1} {'type': 'loss', 'content': 0.008075188845396042, 'timestamp': '2025-09-30 23:01:32.151893', 'step': 759, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:32.228024', 'step': 759, 'epoch': 1} {'type': 'loss', 'content': 0.053990695625543594, 'timestamp': '2025-09-30 23:01:32.239616', 'step': 760, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1596914505344}], 'timestamp': '2025-09-30 23:01:36.679202', 'step': 760, 'epoch': 1} {'type': 'pplx', 'content': 5477005.007054888, 'timestamp': '2025-09-30 23:01:36.691692', 'step': 760, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:36.748515', 'step': 760, 'epoch': 1} {'type': 'loss', 'content': 0.02243911847472191, 'timestamp': '2025-09-30 23:01:36.750485', 'step': 761, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:36.809302', 'step': 761, 'epoch': 1} {'type': 'loss', 'content': 0.030586842447519302, 'timestamp': '2025-09-30 23:01:36.812058', 'step': 762, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:01:36.874261', 'step': 762, 'epoch': 1} {'type': 'loss', 'content': 0.04351625218987465, 'timestamp': '2025-09-30 23:01:36.878183', 'step': 763, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:36.931822', 'step': 763, 'epoch': 1} {'type': 'loss', 'content': 0.07079356163740158, 'timestamp': '2025-09-30 23:01:36.939766', 'step': 764, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:36.993718', 'step': 764, 'epoch': 1} {'type': 'loss', 'content': 0.02976100519299507, 'timestamp': '2025-09-30 23:01:36.996636', 'step': 765, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:37.055032', 'step': 765, 'epoch': 1} {'type': 'loss', 'content': 0.037381596863269806, 'timestamp': '2025-09-30 23:01:37.058919', 'step': 766, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:37.121520', 'step': 766, 'epoch': 1} {'type': 'loss', 'content': 0.034120429307222366, 'timestamp': '2025-09-30 23:01:37.131539', 'step': 767, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:37.190733', 'step': 767, 'epoch': 1} {'type': 'loss', 'content': 0.02589803747832775, 'timestamp': '2025-09-30 23:01:37.196346', 'step': 768, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:37.247932', 'step': 768, 'epoch': 1} {'type': 'loss', 'content': 0.01118621975183487, 'timestamp': '2025-09-30 23:01:37.253974', 'step': 769, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:37.309830', 'step': 769, 'epoch': 1} {'type': 'loss', 'content': 0.03211459144949913, 'timestamp': '2025-09-30 23:01:37.318007', 'step': 770, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:37.378029', 'step': 770, 'epoch': 1} {'type': 'loss', 'content': 0.012033934704959393, 'timestamp': '2025-09-30 23:01:37.380490', 'step': 771, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:37.438255', 'step': 771, 'epoch': 1} {'type': 'loss', 'content': 0.04008110240101814, 'timestamp': '2025-09-30 23:01:37.460147', 'step': 772, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:37.527060', 'step': 772, 'epoch': 1} {'type': 'loss', 'content': 0.026277853175997734, 'timestamp': '2025-09-30 23:01:37.533221', 'step': 773, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:37.585664', 'step': 773, 'epoch': 1} {'type': 'loss', 'content': 0.032803285866975784, 'timestamp': '2025-09-30 23:01:37.587892', 'step': 774, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:37.658083', 'step': 774, 'epoch': 1} {'type': 'loss', 'content': 0.022924313321709633, 'timestamp': '2025-09-30 23:01:37.666432', 'step': 775, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:37.722249', 'step': 775, 'epoch': 1} {'type': 'loss', 'content': 0.00961998850107193, 'timestamp': '2025-09-30 23:01:37.728627', 'step': 776, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:37.784427', 'step': 776, 'epoch': 1} {'type': 'loss', 'content': 0.016324248164892197, 'timestamp': '2025-09-30 23:01:37.787433', 'step': 777, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:01:37.851223', 'step': 777, 'epoch': 1} {'type': 'loss', 'content': 0.021985555067658424, 'timestamp': '2025-09-30 23:01:37.854054', 'step': 778, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:37.913087', 'step': 778, 'epoch': 1} {'type': 'loss', 'content': 0.07625267654657364, 'timestamp': '2025-09-30 23:01:37.923168', 'step': 779, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:37.977617', 'step': 779, 'epoch': 1} {'type': 'loss', 'content': 0.01586642675101757, 'timestamp': '2025-09-30 23:01:37.985041', 'step': 780, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:38.050887', 'step': 780, 'epoch': 1} {'type': 'loss', 'content': 0.03927386924624443, 'timestamp': '2025-09-30 23:01:38.061211', 'step': 781, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:38.123504', 'step': 781, 'epoch': 1} {'type': 'loss', 'content': 0.03937705606222153, 'timestamp': '2025-09-30 23:01:38.131882', 'step': 782, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:38.203637', 'step': 782, 'epoch': 1} {'type': 'loss', 'content': 0.03616476058959961, 'timestamp': '2025-09-30 23:01:38.207567', 'step': 783, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:38.270446', 'step': 783, 'epoch': 1} {'type': 'loss', 'content': 0.016100743785500526, 'timestamp': '2025-09-30 23:01:38.278132', 'step': 784, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:38.343401', 'step': 784, 'epoch': 1} {'type': 'loss', 'content': 0.02286832220852375, 'timestamp': '2025-09-30 23:01:38.346694', 'step': 785, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:38.446162', 'step': 785, 'epoch': 1} {'type': 'loss', 'content': 0.0310807041823864, 'timestamp': '2025-09-30 23:01:38.459411', 'step': 786, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:38.515226', 'step': 786, 'epoch': 1} {'type': 'loss', 'content': 0.040844161063432693, 'timestamp': '2025-09-30 23:01:38.517418', 'step': 787, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:38.572531', 'step': 787, 'epoch': 1} {'type': 'loss', 'content': 0.035684484988451004, 'timestamp': '2025-09-30 23:01:38.585872', 'step': 788, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:38.642177', 'step': 788, 'epoch': 1} {'type': 'loss', 'content': 0.023812713101506233, 'timestamp': '2025-09-30 23:01:38.657917', 'step': 789, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:38.725182', 'step': 789, 'epoch': 1} {'type': 'loss', 'content': 0.016570908948779106, 'timestamp': '2025-09-30 23:01:38.736676', 'step': 790, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:38.799302', 'step': 790, 'epoch': 1} {'type': 'loss', 'content': 0.055326174944639206, 'timestamp': '2025-09-30 23:01:38.802849', 'step': 791, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:38.858601', 'step': 791, 'epoch': 1} {'type': 'loss', 'content': 0.04794105514883995, 'timestamp': '2025-09-30 23:01:38.867043', 'step': 792, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:38.939828', 'step': 792, 'epoch': 1} {'type': 'loss', 'content': 0.040087588131427765, 'timestamp': '2025-09-30 23:01:38.942356', 'step': 793, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:39.002778', 'step': 793, 'epoch': 1} {'type': 'loss', 'content': 0.052113126963377, 'timestamp': '2025-09-30 23:01:39.016088', 'step': 794, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:39.084416', 'step': 794, 'epoch': 1} {'type': 'loss', 'content': 0.008430135436356068, 'timestamp': '2025-09-30 23:01:39.088954', 'step': 795, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:39.149963', 'step': 795, 'epoch': 1} {'type': 'loss', 'content': 0.009721459820866585, 'timestamp': '2025-09-30 23:01:39.161107', 'step': 796, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:39.215885', 'step': 796, 'epoch': 1} {'type': 'loss', 'content': 0.026752332225441933, 'timestamp': '2025-09-30 23:01:39.223595', 'step': 797, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:39.293807', 'step': 797, 'epoch': 1} {'type': 'loss', 'content': 0.04208645597100258, 'timestamp': '2025-09-30 23:01:39.297900', 'step': 798, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:39.354989', 'step': 798, 'epoch': 1} {'type': 'loss', 'content': 0.02110152132809162, 'timestamp': '2025-09-30 23:01:39.365143', 'step': 799, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:39.427809', 'step': 799, 'epoch': 1} {'type': 'loss', 'content': 0.008370422758162022, 'timestamp': '2025-09-30 23:01:39.434366', 'step': 800, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:39.508311', 'step': 800, 'epoch': 1} {'type': 'loss', 'content': 0.0043225898407399654, 'timestamp': '2025-09-30 23:01:39.511430', 'step': 801, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:39.576269', 'step': 801, 'epoch': 1} {'type': 'loss', 'content': 0.004933378659188747, 'timestamp': '2025-09-30 23:01:39.581538', 'step': 802, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:39.645814', 'step': 802, 'epoch': 1} {'type': 'loss', 'content': 0.03848690912127495, 'timestamp': '2025-09-30 23:01:39.653079', 'step': 803, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:39.716051', 'step': 803, 'epoch': 1} {'type': 'loss', 'content': 0.047671444714069366, 'timestamp': '2025-09-30 23:01:39.724099', 'step': 804, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:39.796065', 'step': 804, 'epoch': 1} {'type': 'loss', 'content': 0.014988332986831665, 'timestamp': '2025-09-30 23:01:39.800450', 'step': 805, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:01:39.854815', 'step': 805, 'epoch': 1} {'type': 'loss', 'content': 0.033238060772418976, 'timestamp': '2025-09-30 23:01:39.859979', 'step': 806, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:39.919406', 'step': 806, 'epoch': 1} {'type': 'loss', 'content': 0.017044920474290848, 'timestamp': '2025-09-30 23:01:39.925100', 'step': 807, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:39.986513', 'step': 807, 'epoch': 1} {'type': 'loss', 'content': 0.07159120589494705, 'timestamp': '2025-09-30 23:01:39.995003', 'step': 808, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:40.056603', 'step': 808, 'epoch': 1} {'type': 'loss', 'content': 0.02976738102734089, 'timestamp': '2025-09-30 23:01:40.061480', 'step': 809, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:40.121502', 'step': 809, 'epoch': 1} {'type': 'loss', 'content': 0.008462072350084782, 'timestamp': '2025-09-30 23:01:40.124487', 'step': 810, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:40.180288', 'step': 810, 'epoch': 1} {'type': 'loss', 'content': 0.02576334774494171, 'timestamp': '2025-09-30 23:01:40.183197', 'step': 811, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:40.237907', 'step': 811, 'epoch': 1} {'type': 'loss', 'content': 0.021414505317807198, 'timestamp': '2025-09-30 23:01:40.244779', 'step': 812, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:40.305007', 'step': 812, 'epoch': 1} {'type': 'loss', 'content': 0.012908092699944973, 'timestamp': '2025-09-30 23:01:40.307584', 'step': 813, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:40.361268', 'step': 813, 'epoch': 1} {'type': 'loss', 'content': 0.018744586035609245, 'timestamp': '2025-09-30 23:01:40.364144', 'step': 814, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:40.418736', 'step': 814, 'epoch': 1} {'type': 'loss', 'content': 0.00485923932865262, 'timestamp': '2025-09-30 23:01:40.421024', 'step': 815, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:40.474889', 'step': 815, 'epoch': 1} {'type': 'loss', 'content': 0.03186254948377609, 'timestamp': '2025-09-30 23:01:40.481128', 'step': 816, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:40.533931', 'step': 816, 'epoch': 1} {'type': 'loss', 'content': 0.057132039219141006, 'timestamp': '2025-09-30 23:01:40.536219', 'step': 817, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:40.589052', 'step': 817, 'epoch': 1} {'type': 'loss', 'content': 0.012574831023812294, 'timestamp': '2025-09-30 23:01:40.594167', 'step': 818, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:40.647733', 'step': 818, 'epoch': 1} {'type': 'loss', 'content': 0.013155775144696236, 'timestamp': '2025-09-30 23:01:40.650283', 'step': 819, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:40.704981', 'step': 819, 'epoch': 1} {'type': 'loss', 'content': 0.0136887077242136, 'timestamp': '2025-09-30 23:01:40.711296', 'step': 820, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:40.771640', 'step': 820, 'epoch': 1} {'type': 'loss', 'content': 0.03719786927103996, 'timestamp': '2025-09-30 23:01:40.775600', 'step': 821, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:40.830888', 'step': 821, 'epoch': 1} {'type': 'loss', 'content': 0.03987222909927368, 'timestamp': '2025-09-30 23:01:40.834826', 'step': 822, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:40.888245', 'step': 822, 'epoch': 1} {'type': 'loss', 'content': 0.0023163706064224243, 'timestamp': '2025-09-30 23:01:40.891623', 'step': 823, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:40.958413', 'step': 823, 'epoch': 1} {'type': 'loss', 'content': 0.03673391789197922, 'timestamp': '2025-09-30 23:01:40.970636', 'step': 824, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:41.022393', 'step': 824, 'epoch': 1} {'type': 'loss', 'content': 0.037853918969631195, 'timestamp': '2025-09-30 23:01:41.025142', 'step': 825, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:41.081661', 'step': 825, 'epoch': 1} {'type': 'loss', 'content': 0.0360364206135273, 'timestamp': '2025-09-30 23:01:41.086423', 'step': 826, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:41.149038', 'step': 826, 'epoch': 1} {'type': 'loss', 'content': 0.01686854287981987, 'timestamp': '2025-09-30 23:01:41.152241', 'step': 827, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:41.214803', 'step': 827, 'epoch': 1} {'type': 'loss', 'content': 0.008635458536446095, 'timestamp': '2025-09-30 23:01:41.224952', 'step': 828, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:41.280258', 'step': 828, 'epoch': 1} {'type': 'loss', 'content': 0.05941464379429817, 'timestamp': '2025-09-30 23:01:41.286218', 'step': 829, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:41.347297', 'step': 829, 'epoch': 1} {'type': 'loss', 'content': 0.06866133958101273, 'timestamp': '2025-09-30 23:01:41.350244', 'step': 830, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:41.405001', 'step': 830, 'epoch': 1} {'type': 'loss', 'content': 0.055822890251874924, 'timestamp': '2025-09-30 23:01:41.407921', 'step': 831, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:01:41.461679', 'step': 831, 'epoch': 1} {'type': 'loss', 'content': 0.02478504553437233, 'timestamp': '2025-09-30 23:01:41.467931', 'step': 832, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:41.523285', 'step': 832, 'epoch': 1} {'type': 'loss', 'content': 0.06630142778158188, 'timestamp': '2025-09-30 23:01:41.525551', 'step': 833, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:41.578658', 'step': 833, 'epoch': 1} {'type': 'loss', 'content': 0.024110285565257072, 'timestamp': '2025-09-30 23:01:41.582965', 'step': 834, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:41.640819', 'step': 834, 'epoch': 1} {'type': 'loss', 'content': 0.042751967906951904, 'timestamp': '2025-09-30 23:01:41.643693', 'step': 835, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:41.700585', 'step': 835, 'epoch': 1} {'type': 'loss', 'content': 0.055142100900411606, 'timestamp': '2025-09-30 23:01:41.706960', 'step': 836, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:41.760820', 'step': 836, 'epoch': 1} {'type': 'loss', 'content': 0.0470021553337574, 'timestamp': '2025-09-30 23:01:41.763996', 'step': 837, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:41.823346', 'step': 837, 'epoch': 1} {'type': 'loss', 'content': 0.017830369994044304, 'timestamp': '2025-09-30 23:01:41.826778', 'step': 838, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:01:41.880903', 'step': 838, 'epoch': 1} {'type': 'loss', 'content': 0.08437288552522659, 'timestamp': '2025-09-30 23:01:41.883097', 'step': 839, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:41.937949', 'step': 839, 'epoch': 1} {'type': 'loss', 'content': 0.010386079549789429, 'timestamp': '2025-09-30 23:01:41.945034', 'step': 840, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:42.007427', 'step': 840, 'epoch': 1} {'type': 'loss', 'content': 0.026947038248181343, 'timestamp': '2025-09-30 23:01:42.010014', 'step': 841, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:42.064898', 'step': 841, 'epoch': 1} {'type': 'loss', 'content': 0.0027078010607510805, 'timestamp': '2025-09-30 23:01:42.068027', 'step': 842, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:42.122206', 'step': 842, 'epoch': 1} {'type': 'loss', 'content': 0.006933902390301228, 'timestamp': '2025-09-30 23:01:42.124957', 'step': 843, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:42.178084', 'step': 843, 'epoch': 1} {'type': 'loss', 'content': 0.042960163205862045, 'timestamp': '2025-09-30 23:01:42.185109', 'step': 844, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:01:42.238097', 'step': 844, 'epoch': 1} {'type': 'loss', 'content': 0.012449991889297962, 'timestamp': '2025-09-30 23:01:42.244321', 'step': 845, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:42.308419', 'step': 845, 'epoch': 1} {'type': 'loss', 'content': 0.009524223394691944, 'timestamp': '2025-09-30 23:01:42.316159', 'step': 846, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:42.373675', 'step': 846, 'epoch': 1} {'type': 'loss', 'content': 0.028616994619369507, 'timestamp': '2025-09-30 23:01:42.376634', 'step': 847, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:42.439379', 'step': 847, 'epoch': 1} {'type': 'loss', 'content': 0.009903250262141228, 'timestamp': '2025-09-30 23:01:42.446130', 'step': 848, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:42.501629', 'step': 848, 'epoch': 1} {'type': 'loss', 'content': 0.012156076729297638, 'timestamp': '2025-09-30 23:01:42.504444', 'step': 849, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:42.559126', 'step': 849, 'epoch': 1} {'type': 'loss', 'content': 0.03881649300456047, 'timestamp': '2025-09-30 23:01:42.562840', 'step': 850, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:42.623268', 'step': 850, 'epoch': 1} {'type': 'loss', 'content': 0.004314872901886702, 'timestamp': '2025-09-30 23:01:42.626611', 'step': 851, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:42.681301', 'step': 851, 'epoch': 1} {'type': 'loss', 'content': 0.0272415392100811, 'timestamp': '2025-09-30 23:01:42.690586', 'step': 852, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:42.753646', 'step': 852, 'epoch': 1} {'type': 'loss', 'content': 0.04589690640568733, 'timestamp': '2025-09-30 23:01:42.758517', 'step': 853, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:42.828764', 'step': 853, 'epoch': 1} {'type': 'loss', 'content': 0.028447898104786873, 'timestamp': '2025-09-30 23:01:42.832303', 'step': 854, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:42.890577', 'step': 854, 'epoch': 1} {'type': 'loss', 'content': 0.03048432059586048, 'timestamp': '2025-09-30 23:01:42.896381', 'step': 855, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:42.957808', 'step': 855, 'epoch': 1} {'type': 'loss', 'content': 0.012247803620994091, 'timestamp': '2025-09-30 23:01:42.966101', 'step': 856, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:01:43.024768', 'step': 856, 'epoch': 1} {'type': 'loss', 'content': 0.017625847831368446, 'timestamp': '2025-09-30 23:01:43.030373', 'step': 857, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:43.086976', 'step': 857, 'epoch': 1} {'type': 'loss', 'content': 0.06193142756819725, 'timestamp': '2025-09-30 23:01:43.092093', 'step': 858, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:43.156759', 'step': 858, 'epoch': 1} {'type': 'loss', 'content': 0.049581609666347504, 'timestamp': '2025-09-30 23:01:43.160110', 'step': 859, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:43.217954', 'step': 859, 'epoch': 1} {'type': 'loss', 'content': 0.08599596470594406, 'timestamp': '2025-09-30 23:01:43.225565', 'step': 860, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:43.283000', 'step': 860, 'epoch': 1} {'type': 'loss', 'content': 0.04211919754743576, 'timestamp': '2025-09-30 23:01:43.287206', 'step': 861, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:43.348987', 'step': 861, 'epoch': 1} {'type': 'loss', 'content': 0.017908509820699692, 'timestamp': '2025-09-30 23:01:43.355061', 'step': 862, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:01:43.420864', 'step': 862, 'epoch': 1} {'type': 'loss', 'content': 0.06517205387353897, 'timestamp': '2025-09-30 23:01:43.430021', 'step': 863, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:43.496028', 'step': 863, 'epoch': 1} {'type': 'loss', 'content': 0.02844844199717045, 'timestamp': '2025-09-30 23:01:43.509117', 'step': 864, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:43.580859', 'step': 864, 'epoch': 1} {'type': 'loss', 'content': 0.05836174264550209, 'timestamp': '2025-09-30 23:01:43.586314', 'step': 865, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:43.642856', 'step': 865, 'epoch': 1} {'type': 'loss', 'content': 0.0328885093331337, 'timestamp': '2025-09-30 23:01:43.645996', 'step': 866, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:43.712193', 'step': 866, 'epoch': 1} {'type': 'loss', 'content': 0.03841327503323555, 'timestamp': '2025-09-30 23:01:43.715732', 'step': 867, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:43.776793', 'step': 867, 'epoch': 1} {'type': 'loss', 'content': 0.03795378655195236, 'timestamp': '2025-09-30 23:01:43.786749', 'step': 868, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:43.852110', 'step': 868, 'epoch': 1} {'type': 'loss', 'content': 0.054861150681972504, 'timestamp': '2025-09-30 23:01:43.855306', 'step': 869, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:43.909008', 'step': 869, 'epoch': 1} {'type': 'loss', 'content': 0.04028218612074852, 'timestamp': '2025-09-30 23:01:43.913127', 'step': 870, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:43.974874', 'step': 870, 'epoch': 1} {'type': 'loss', 'content': 0.020712658762931824, 'timestamp': '2025-09-30 23:01:43.979182', 'step': 871, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:44.037347', 'step': 871, 'epoch': 1} {'type': 'loss', 'content': 0.01940157450735569, 'timestamp': '2025-09-30 23:01:44.045963', 'step': 872, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:01:44.107797', 'step': 872, 'epoch': 1} {'type': 'loss', 'content': 0.060273654758930206, 'timestamp': '2025-09-30 23:01:44.110625', 'step': 873, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:44.169668', 'step': 873, 'epoch': 1} {'type': 'loss', 'content': 0.025629861280322075, 'timestamp': '2025-09-30 23:01:44.174266', 'step': 874, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:44.232325', 'step': 874, 'epoch': 1} {'type': 'loss', 'content': 0.033108409494161606, 'timestamp': '2025-09-30 23:01:44.235016', 'step': 875, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:44.295310', 'step': 875, 'epoch': 1} {'type': 'loss', 'content': 0.039151597768068314, 'timestamp': '2025-09-30 23:01:44.301733', 'step': 876, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:44.357796', 'step': 876, 'epoch': 1} {'type': 'loss', 'content': 0.04283430799841881, 'timestamp': '2025-09-30 23:01:44.360255', 'step': 877, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:44.419545', 'step': 877, 'epoch': 1} {'type': 'loss', 'content': 0.030710693448781967, 'timestamp': '2025-09-30 23:01:44.423533', 'step': 878, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:44.478372', 'step': 878, 'epoch': 1} {'type': 'loss', 'content': 0.024485517293214798, 'timestamp': '2025-09-30 23:01:44.485921', 'step': 879, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:44.541689', 'step': 879, 'epoch': 1} {'type': 'loss', 'content': 0.0364973358809948, 'timestamp': '2025-09-30 23:01:44.547996', 'step': 880, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:44.602735', 'step': 880, 'epoch': 1} {'type': 'loss', 'content': 0.030123425647616386, 'timestamp': '2025-09-30 23:01:44.606130', 'step': 881, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:44.669345', 'step': 881, 'epoch': 1} {'type': 'loss', 'content': 0.03594103083014488, 'timestamp': '2025-09-30 23:01:44.675487', 'step': 882, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:44.738717', 'step': 882, 'epoch': 1} {'type': 'loss', 'content': 0.03687836974859238, 'timestamp': '2025-09-30 23:01:44.743475', 'step': 883, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:44.820208', 'step': 883, 'epoch': 1} {'type': 'loss', 'content': 0.01788102462887764, 'timestamp': '2025-09-30 23:01:44.832759', 'step': 884, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:44.888467', 'step': 884, 'epoch': 1} {'type': 'loss', 'content': 0.02062665857374668, 'timestamp': '2025-09-30 23:01:44.895296', 'step': 885, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:44.963159', 'step': 885, 'epoch': 1} {'type': 'loss', 'content': 0.0231231190264225, 'timestamp': '2025-09-30 23:01:44.965873', 'step': 886, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:45.025712', 'step': 886, 'epoch': 1} {'type': 'loss', 'content': 0.03804356977343559, 'timestamp': '2025-09-30 23:01:45.028268', 'step': 887, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:45.083435', 'step': 887, 'epoch': 1} {'type': 'loss', 'content': 0.052623968571424484, 'timestamp': '2025-09-30 23:01:45.089717', 'step': 888, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:45.143232', 'step': 888, 'epoch': 1} {'type': 'loss', 'content': 0.047556180506944656, 'timestamp': '2025-09-30 23:01:45.145589', 'step': 889, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:45.197836', 'step': 889, 'epoch': 1} {'type': 'loss', 'content': 0.01451077125966549, 'timestamp': '2025-09-30 23:01:45.200274', 'step': 890, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 23:01:45.280053', 'step': 890, 'epoch': 1} {'type': 'loss', 'content': 0.05255204811692238, 'timestamp': '2025-09-30 23:01:45.282212', 'step': 891, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:45.334963', 'step': 891, 'epoch': 1} {'type': 'loss', 'content': 0.01087985746562481, 'timestamp': '2025-09-30 23:01:45.346410', 'step': 892, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:45.400005', 'step': 892, 'epoch': 1} {'type': 'loss', 'content': 0.024471750482916832, 'timestamp': '2025-09-30 23:01:45.404750', 'step': 893, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:45.464829', 'step': 893, 'epoch': 1} {'type': 'loss', 'content': 0.04286841303110123, 'timestamp': '2025-09-30 23:01:45.469711', 'step': 894, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:45.532058', 'step': 894, 'epoch': 1} {'type': 'loss', 'content': 0.054453544318675995, 'timestamp': '2025-09-30 23:01:45.535122', 'step': 895, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:45.592326', 'step': 895, 'epoch': 1} {'type': 'loss', 'content': 0.04351013898849487, 'timestamp': '2025-09-30 23:01:45.602126', 'step': 896, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:45.668496', 'step': 896, 'epoch': 1} {'type': 'loss', 'content': 0.05330852046608925, 'timestamp': '2025-09-30 23:01:45.674468', 'step': 897, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:45.728486', 'step': 897, 'epoch': 1} {'type': 'loss', 'content': 0.05772271007299423, 'timestamp': '2025-09-30 23:01:45.731744', 'step': 898, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:45.794820', 'step': 898, 'epoch': 1} {'type': 'loss', 'content': 0.04748755320906639, 'timestamp': '2025-09-30 23:01:45.801152', 'step': 899, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:45.862821', 'step': 899, 'epoch': 1} {'type': 'loss', 'content': 0.03401828929781914, 'timestamp': '2025-09-30 23:01:45.869394', 'step': 900, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:45.931001', 'step': 900, 'epoch': 1} {'type': 'loss', 'content': 0.012792103923857212, 'timestamp': '2025-09-30 23:01:45.940408', 'step': 901, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:46.001625', 'step': 901, 'epoch': 1} {'type': 'loss', 'content': 0.0328175313770771, 'timestamp': '2025-09-30 23:01:46.004597', 'step': 902, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:46.060748', 'step': 902, 'epoch': 1} {'type': 'loss', 'content': 0.02408072166144848, 'timestamp': '2025-09-30 23:01:46.063000', 'step': 903, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:46.126001', 'step': 903, 'epoch': 1} {'type': 'loss', 'content': 0.03160642087459564, 'timestamp': '2025-09-30 23:01:46.133703', 'step': 904, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:46.189582', 'step': 904, 'epoch': 1} {'type': 'loss', 'content': 0.015782320871949196, 'timestamp': '2025-09-30 23:01:46.191988', 'step': 905, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:46.244905', 'step': 905, 'epoch': 1} {'type': 'loss', 'content': 0.029420912265777588, 'timestamp': '2025-09-30 23:01:46.247193', 'step': 906, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:46.300437', 'step': 906, 'epoch': 1} {'type': 'loss', 'content': 0.0537630096077919, 'timestamp': '2025-09-30 23:01:46.302730', 'step': 907, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:46.355004', 'step': 907, 'epoch': 1} {'type': 'loss', 'content': 0.012278038077056408, 'timestamp': '2025-09-30 23:01:46.361160', 'step': 908, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:46.417548', 'step': 908, 'epoch': 1} {'type': 'loss', 'content': 0.05677291750907898, 'timestamp': '2025-09-30 23:01:46.424719', 'step': 909, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:46.477346', 'step': 909, 'epoch': 1} {'type': 'loss', 'content': 0.029239853844046593, 'timestamp': '2025-09-30 23:01:46.484403', 'step': 910, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:46.557731', 'step': 910, 'epoch': 1} {'type': 'loss', 'content': 0.016625234857201576, 'timestamp': '2025-09-30 23:01:46.563437', 'step': 911, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:46.657579', 'step': 911, 'epoch': 1} {'type': 'loss', 'content': 0.015671832486987114, 'timestamp': '2025-09-30 23:01:46.679502', 'step': 912, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1596914505344}], 'timestamp': '2025-09-30 23:01:51.684146', 'step': 912, 'epoch': 1} {'type': 'pplx', 'content': 4796488.317706935, 'timestamp': '2025-09-30 23:01:51.688401', 'step': 912, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:51.747574', 'step': 912, 'epoch': 1} {'type': 'loss', 'content': 0.051528628915548325, 'timestamp': '2025-09-30 23:01:51.756523', 'step': 913, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:51.830748', 'step': 913, 'epoch': 1} {'type': 'loss', 'content': 0.026990437880158424, 'timestamp': '2025-09-30 23:01:51.843644', 'step': 914, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:01:51.917778', 'step': 914, 'epoch': 1} {'type': 'loss', 'content': 0.031246110796928406, 'timestamp': '2025-09-30 23:01:51.925935', 'step': 915, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:01:51.999607', 'step': 915, 'epoch': 1} {'type': 'loss', 'content': 0.028680631890892982, 'timestamp': '2025-09-30 23:01:52.008012', 'step': 916, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:52.074140', 'step': 916, 'epoch': 1} {'type': 'loss', 'content': 0.012956093065440655, 'timestamp': '2025-09-30 23:01:52.084196', 'step': 917, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:52.156305', 'step': 917, 'epoch': 1} {'type': 'loss', 'content': 0.026706237345933914, 'timestamp': '2025-09-30 23:01:52.162340', 'step': 918, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:52.244137', 'step': 918, 'epoch': 1} {'type': 'loss', 'content': 0.016436712816357613, 'timestamp': '2025-09-30 23:01:52.252772', 'step': 919, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:52.331494', 'step': 919, 'epoch': 1} {'type': 'loss', 'content': 0.041679445654153824, 'timestamp': '2025-09-30 23:01:52.342196', 'step': 920, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:52.418213', 'step': 920, 'epoch': 1} {'type': 'loss', 'content': 0.049134593456983566, 'timestamp': '2025-09-30 23:01:52.427262', 'step': 921, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:52.506146', 'step': 921, 'epoch': 1} {'type': 'loss', 'content': 0.017595872282981873, 'timestamp': '2025-09-30 23:01:52.514350', 'step': 922, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:52.577255', 'step': 922, 'epoch': 1} {'type': 'loss', 'content': 0.03826640173792839, 'timestamp': '2025-09-30 23:01:52.580819', 'step': 923, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:01:52.652916', 'step': 923, 'epoch': 1} {'type': 'loss', 'content': 0.023380981758236885, 'timestamp': '2025-09-30 23:01:52.664613', 'step': 924, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:01:52.725690', 'step': 924, 'epoch': 1} {'type': 'loss', 'content': 0.03149070590734482, 'timestamp': '2025-09-30 23:01:52.729968', 'step': 925, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:52.810013', 'step': 925, 'epoch': 1} {'type': 'loss', 'content': 0.014512987807393074, 'timestamp': '2025-09-30 23:01:52.815401', 'step': 926, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:52.878016', 'step': 926, 'epoch': 1} {'type': 'loss', 'content': 0.041023045778274536, 'timestamp': '2025-09-30 23:01:52.881941', 'step': 927, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:52.951290', 'step': 927, 'epoch': 1} {'type': 'loss', 'content': 0.04123148322105408, 'timestamp': '2025-09-30 23:01:52.966054', 'step': 928, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:53.046495', 'step': 928, 'epoch': 1} {'type': 'loss', 'content': 0.03693842515349388, 'timestamp': '2025-09-30 23:01:53.061714', 'step': 929, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:53.121338', 'step': 929, 'epoch': 1} {'type': 'loss', 'content': 0.07058300077915192, 'timestamp': '2025-09-30 23:01:53.124772', 'step': 930, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:53.204719', 'step': 930, 'epoch': 1} {'type': 'loss', 'content': 0.013540773652493954, 'timestamp': '2025-09-30 23:01:53.213990', 'step': 931, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:53.290846', 'step': 931, 'epoch': 1} {'type': 'loss', 'content': 0.06485407054424286, 'timestamp': '2025-09-30 23:01:53.297875', 'step': 932, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:53.376141', 'step': 932, 'epoch': 1} {'type': 'loss', 'content': 0.023985369130969048, 'timestamp': '2025-09-30 23:01:53.385511', 'step': 933, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:53.454661', 'step': 933, 'epoch': 1} {'type': 'loss', 'content': 0.02786863222718239, 'timestamp': '2025-09-30 23:01:53.458648', 'step': 934, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:53.524221', 'step': 934, 'epoch': 1} {'type': 'loss', 'content': 0.03358496353030205, 'timestamp': '2025-09-30 23:01:53.534473', 'step': 935, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:53.604842', 'step': 935, 'epoch': 1} {'type': 'loss', 'content': 0.046580616384744644, 'timestamp': '2025-09-30 23:01:53.616146', 'step': 936, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:53.672929', 'step': 936, 'epoch': 1} {'type': 'loss', 'content': 0.04051894322037697, 'timestamp': '2025-09-30 23:01:53.681625', 'step': 937, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:53.749141', 'step': 937, 'epoch': 1} {'type': 'loss', 'content': 0.05072256922721863, 'timestamp': '2025-09-30 23:01:53.752947', 'step': 938, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:53.807056', 'step': 938, 'epoch': 1} {'type': 'loss', 'content': 0.03205116465687752, 'timestamp': '2025-09-30 23:01:53.823198', 'step': 939, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:53.917000', 'step': 939, 'epoch': 1} {'type': 'loss', 'content': 0.029656371101737022, 'timestamp': '2025-09-30 23:01:53.935497', 'step': 940, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:54.015574', 'step': 940, 'epoch': 1} {'type': 'loss', 'content': 0.026030199602246284, 'timestamp': '2025-09-30 23:01:54.027150', 'step': 941, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:54.093635', 'step': 941, 'epoch': 1} {'type': 'loss', 'content': 0.020986279472708702, 'timestamp': '2025-09-30 23:01:54.102980', 'step': 942, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:54.174800', 'step': 942, 'epoch': 1} {'type': 'loss', 'content': 0.06209013983607292, 'timestamp': '2025-09-30 23:01:54.186276', 'step': 943, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:54.254592', 'step': 943, 'epoch': 1} {'type': 'loss', 'content': 0.018431976437568665, 'timestamp': '2025-09-30 23:01:54.267405', 'step': 944, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:54.341563', 'step': 944, 'epoch': 1} {'type': 'loss', 'content': 0.015084302984178066, 'timestamp': '2025-09-30 23:01:54.350615', 'step': 945, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:01:54.422311', 'step': 945, 'epoch': 1} {'type': 'loss', 'content': 0.02040293999016285, 'timestamp': '2025-09-30 23:01:54.436137', 'step': 946, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:01:54.501506', 'step': 946, 'epoch': 1} {'type': 'loss', 'content': 0.03218474239110947, 'timestamp': '2025-09-30 23:01:54.508440', 'step': 947, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:54.582031', 'step': 947, 'epoch': 1} {'type': 'loss', 'content': 0.04374002665281296, 'timestamp': '2025-09-30 23:01:54.589529', 'step': 948, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:54.656714', 'step': 948, 'epoch': 1} {'type': 'loss', 'content': 0.03554220125079155, 'timestamp': '2025-09-30 23:01:54.666726', 'step': 949, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:54.736987', 'step': 949, 'epoch': 1} {'type': 'loss', 'content': 0.031255099922418594, 'timestamp': '2025-09-30 23:01:54.741192', 'step': 950, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:54.813950', 'step': 950, 'epoch': 1} {'type': 'loss', 'content': 0.03591073676943779, 'timestamp': '2025-09-30 23:01:54.826360', 'step': 951, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:01:54.899745', 'step': 951, 'epoch': 1} {'type': 'loss', 'content': 0.054497022181749344, 'timestamp': '2025-09-30 23:01:54.907500', 'step': 952, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:54.963229', 'step': 952, 'epoch': 1} {'type': 'loss', 'content': 0.014292588457465172, 'timestamp': '2025-09-30 23:01:54.968617', 'step': 953, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:55.030081', 'step': 953, 'epoch': 1} {'type': 'loss', 'content': 0.006809546146541834, 'timestamp': '2025-09-30 23:01:55.034159', 'step': 954, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:55.104407', 'step': 954, 'epoch': 1} {'type': 'loss', 'content': 0.034312210977077484, 'timestamp': '2025-09-30 23:01:55.112141', 'step': 955, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:55.175018', 'step': 955, 'epoch': 1} {'type': 'loss', 'content': 0.028233494609594345, 'timestamp': '2025-09-30 23:01:55.182766', 'step': 956, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:55.254339', 'step': 956, 'epoch': 1} {'type': 'loss', 'content': 0.04965582489967346, 'timestamp': '2025-09-30 23:01:55.257672', 'step': 957, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:55.335812', 'step': 957, 'epoch': 1} {'type': 'loss', 'content': 0.03330351412296295, 'timestamp': '2025-09-30 23:01:55.339580', 'step': 958, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:55.405377', 'step': 958, 'epoch': 1} {'type': 'loss', 'content': 0.02243879809975624, 'timestamp': '2025-09-30 23:01:55.418361', 'step': 959, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:55.492340', 'step': 959, 'epoch': 1} {'type': 'loss', 'content': 0.011996844783425331, 'timestamp': '2025-09-30 23:01:55.502090', 'step': 960, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:01:55.578365', 'step': 960, 'epoch': 1} {'type': 'loss', 'content': 0.022760218009352684, 'timestamp': '2025-09-30 23:01:55.588903', 'step': 961, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:55.665738', 'step': 961, 'epoch': 1} {'type': 'loss', 'content': 0.04816261678934097, 'timestamp': '2025-09-30 23:01:55.678976', 'step': 962, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:55.759611', 'step': 962, 'epoch': 1} {'type': 'loss', 'content': 0.0197751522064209, 'timestamp': '2025-09-30 23:01:55.770765', 'step': 963, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:55.828503', 'step': 963, 'epoch': 1} {'type': 'loss', 'content': 0.032824743539094925, 'timestamp': '2025-09-30 23:01:55.835168', 'step': 964, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:55.906808', 'step': 964, 'epoch': 1} {'type': 'loss', 'content': 0.05236168950796127, 'timestamp': '2025-09-30 23:01:55.911580', 'step': 965, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:55.979768', 'step': 965, 'epoch': 1} {'type': 'loss', 'content': 0.02323557808995247, 'timestamp': '2025-09-30 23:01:55.983708', 'step': 966, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:56.061846', 'step': 966, 'epoch': 1} {'type': 'loss', 'content': 0.047923047095537186, 'timestamp': '2025-09-30 23:01:56.076847', 'step': 967, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:56.155691', 'step': 967, 'epoch': 1} {'type': 'loss', 'content': 0.016720568761229515, 'timestamp': '2025-09-30 23:01:56.162811', 'step': 968, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:56.222148', 'step': 968, 'epoch': 1} {'type': 'loss', 'content': 0.04281138256192207, 'timestamp': '2025-09-30 23:01:56.234430', 'step': 969, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:56.312350', 'step': 969, 'epoch': 1} {'type': 'loss', 'content': 0.0070656463503837585, 'timestamp': '2025-09-30 23:01:56.325824', 'step': 970, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:56.397999', 'step': 970, 'epoch': 1} {'type': 'loss', 'content': 0.022280290722846985, 'timestamp': '2025-09-30 23:01:56.410772', 'step': 971, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:01:56.492335', 'step': 971, 'epoch': 1} {'type': 'loss', 'content': 0.014066369272768497, 'timestamp': '2025-09-30 23:01:56.506082', 'step': 972, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:56.586144', 'step': 972, 'epoch': 1} {'type': 'loss', 'content': 0.10097097605466843, 'timestamp': '2025-09-30 23:01:56.594031', 'step': 973, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:56.659314', 'step': 973, 'epoch': 1} {'type': 'loss', 'content': 0.04001113027334213, 'timestamp': '2025-09-30 23:01:56.666325', 'step': 974, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:56.735081', 'step': 974, 'epoch': 1} {'type': 'loss', 'content': 0.011762057431042194, 'timestamp': '2025-09-30 23:01:56.746136', 'step': 975, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:56.803825', 'step': 975, 'epoch': 1} {'type': 'loss', 'content': 0.07961393147706985, 'timestamp': '2025-09-30 23:01:56.812437', 'step': 976, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:56.886009', 'step': 976, 'epoch': 1} {'type': 'loss', 'content': 0.04812028631567955, 'timestamp': '2025-09-30 23:01:56.891720', 'step': 977, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:56.958571', 'step': 977, 'epoch': 1} {'type': 'loss', 'content': 0.05164288729429245, 'timestamp': '2025-09-30 23:01:56.967224', 'step': 978, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:57.030804', 'step': 978, 'epoch': 1} {'type': 'loss', 'content': 0.018440408632159233, 'timestamp': '2025-09-30 23:01:57.049381', 'step': 979, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:57.115051', 'step': 979, 'epoch': 1} {'type': 'loss', 'content': 0.03573951870203018, 'timestamp': '2025-09-30 23:01:57.131049', 'step': 980, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:57.215589', 'step': 980, 'epoch': 1} {'type': 'loss', 'content': 0.004031107760965824, 'timestamp': '2025-09-30 23:01:57.220841', 'step': 981, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:57.302731', 'step': 981, 'epoch': 1} {'type': 'loss', 'content': 0.026479778811335564, 'timestamp': '2025-09-30 23:01:57.318900', 'step': 982, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:57.413103', 'step': 982, 'epoch': 1} {'type': 'loss', 'content': 0.0191289484500885, 'timestamp': '2025-09-30 23:01:57.419391', 'step': 983, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:57.510091', 'step': 983, 'epoch': 1} {'type': 'loss', 'content': 0.021559013053774834, 'timestamp': '2025-09-30 23:01:57.522811', 'step': 984, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:57.581029', 'step': 984, 'epoch': 1} {'type': 'loss', 'content': 0.037628013640642166, 'timestamp': '2025-09-30 23:01:57.597400', 'step': 985, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:01:57.672690', 'step': 985, 'epoch': 1} {'type': 'loss', 'content': 0.05204259976744652, 'timestamp': '2025-09-30 23:01:57.676733', 'step': 986, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:57.770725', 'step': 986, 'epoch': 1} {'type': 'loss', 'content': 0.016048144549131393, 'timestamp': '2025-09-30 23:01:57.778162', 'step': 987, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:57.873148', 'step': 987, 'epoch': 1} {'type': 'loss', 'content': 0.03018929436802864, 'timestamp': '2025-09-30 23:01:57.889775', 'step': 988, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:57.958270', 'step': 988, 'epoch': 1} {'type': 'loss', 'content': 0.06995926052331924, 'timestamp': '2025-09-30 23:01:57.971302', 'step': 989, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:58.035218', 'step': 989, 'epoch': 1} {'type': 'loss', 'content': 0.04265674948692322, 'timestamp': '2025-09-30 23:01:58.041082', 'step': 990, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:01:58.126182', 'step': 990, 'epoch': 1} {'type': 'loss', 'content': 0.021192191168665886, 'timestamp': '2025-09-30 23:01:58.142669', 'step': 991, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:58.241709', 'step': 991, 'epoch': 1} {'type': 'loss', 'content': 0.013456754386425018, 'timestamp': '2025-09-30 23:01:58.252400', 'step': 992, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:58.323273', 'step': 992, 'epoch': 1} {'type': 'loss', 'content': 0.055944256484508514, 'timestamp': '2025-09-30 23:01:58.334309', 'step': 993, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:58.397543', 'step': 993, 'epoch': 1} {'type': 'loss', 'content': 0.03632424399256706, 'timestamp': '2025-09-30 23:01:58.402835', 'step': 994, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:58.472699', 'step': 994, 'epoch': 1} {'type': 'loss', 'content': 0.03365243226289749, 'timestamp': '2025-09-30 23:01:58.478226', 'step': 995, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:58.557228', 'step': 995, 'epoch': 1} {'type': 'loss', 'content': 0.011361269280314445, 'timestamp': '2025-09-30 23:01:58.577154', 'step': 996, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:58.643556', 'step': 996, 'epoch': 1} {'type': 'loss', 'content': 0.06287439167499542, 'timestamp': '2025-09-30 23:01:58.650643', 'step': 997, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:58.718776', 'step': 997, 'epoch': 1} {'type': 'loss', 'content': 0.00860558357089758, 'timestamp': '2025-09-30 23:01:58.722034', 'step': 998, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:58.788139', 'step': 998, 'epoch': 1} {'type': 'loss', 'content': 0.040443044155836105, 'timestamp': '2025-09-30 23:01:58.802719', 'step': 999, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:01:58.863300', 'step': 999, 'epoch': 1} {'type': 'loss', 'content': 0.02155563235282898, 'timestamp': '2025-09-30 23:01:58.871069', 'step': 1000, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 1000', 'timestamp': '2025-09-30 23:01:59.253112', 'step': 1000, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:59.310890', 'step': 1000, 'epoch': 1} {'type': 'loss', 'content': 0.04077725484967232, 'timestamp': '2025-09-30 23:01:59.313177', 'step': 1001, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:59.367341', 'step': 1001, 'epoch': 1} {'type': 'loss', 'content': 0.05014368146657944, 'timestamp': '2025-09-30 23:01:59.369876', 'step': 1002, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:59.424640', 'step': 1002, 'epoch': 1} {'type': 'loss', 'content': 0.04288037493824959, 'timestamp': '2025-09-30 23:01:59.428285', 'step': 1003, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:59.481749', 'step': 1003, 'epoch': 1} {'type': 'loss', 'content': 0.041461702436208725, 'timestamp': '2025-09-30 23:01:59.490065', 'step': 1004, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:59.568137', 'step': 1004, 'epoch': 1} {'type': 'loss', 'content': 0.03420085087418556, 'timestamp': '2025-09-30 23:01:59.580800', 'step': 1005, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:59.652161', 'step': 1005, 'epoch': 1} {'type': 'loss', 'content': 0.031672798097133636, 'timestamp': '2025-09-30 23:01:59.664283', 'step': 1006, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:59.755817', 'step': 1006, 'epoch': 1} {'type': 'loss', 'content': 0.013986657373607159, 'timestamp': '2025-09-30 23:01:59.767705', 'step': 1007, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:59.839722', 'step': 1007, 'epoch': 1} {'type': 'loss', 'content': 0.02602563612163067, 'timestamp': '2025-09-30 23:01:59.848618', 'step': 1008, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:59.915157', 'step': 1008, 'epoch': 1} {'type': 'loss', 'content': 0.044165320694446564, 'timestamp': '2025-09-30 23:01:59.920287', 'step': 1009, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:01:59.985512', 'step': 1009, 'epoch': 1} {'type': 'loss', 'content': 0.04699473828077316, 'timestamp': '2025-09-30 23:01:59.991923', 'step': 1010, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:00.058514', 'step': 1010, 'epoch': 1} {'type': 'loss', 'content': 0.037436168640851974, 'timestamp': '2025-09-30 23:02:00.064722', 'step': 1011, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:00.126605', 'step': 1011, 'epoch': 1} {'type': 'loss', 'content': 0.02789825201034546, 'timestamp': '2025-09-30 23:02:00.137082', 'step': 1012, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:00.207483', 'step': 1012, 'epoch': 1} {'type': 'loss', 'content': 0.05071210488677025, 'timestamp': '2025-09-30 23:02:00.214367', 'step': 1013, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:02:00.281176', 'step': 1013, 'epoch': 1} {'type': 'loss', 'content': 0.027476727962493896, 'timestamp': '2025-09-30 23:02:00.290217', 'step': 1014, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:00.358748', 'step': 1014, 'epoch': 1} {'type': 'loss', 'content': 0.027793005108833313, 'timestamp': '2025-09-30 23:02:00.366960', 'step': 1015, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:00.443349', 'step': 1015, 'epoch': 1} {'type': 'loss', 'content': 0.03713594749569893, 'timestamp': '2025-09-30 23:02:00.449982', 'step': 1016, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:02:00.511306', 'step': 1016, 'epoch': 1} {'type': 'loss', 'content': 0.06551121920347214, 'timestamp': '2025-09-30 23:02:00.523501', 'step': 1017, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:00.597664', 'step': 1017, 'epoch': 1} {'type': 'loss', 'content': 0.023583505302667618, 'timestamp': '2025-09-30 23:02:00.599988', 'step': 1018, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:00.666560', 'step': 1018, 'epoch': 1} {'type': 'loss', 'content': 0.01566501520574093, 'timestamp': '2025-09-30 23:02:00.669572', 'step': 1019, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:02:00.722481', 'step': 1019, 'epoch': 1} {'type': 'loss', 'content': 0.03236089274287224, 'timestamp': '2025-09-30 23:02:00.730746', 'step': 1020, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:00.810679', 'step': 1020, 'epoch': 1} {'type': 'loss', 'content': 0.02470828965306282, 'timestamp': '2025-09-30 23:02:00.826269', 'step': 1021, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:00.897620', 'step': 1021, 'epoch': 1} {'type': 'loss', 'content': 0.035927195101976395, 'timestamp': '2025-09-30 23:02:00.904290', 'step': 1022, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:00.978262', 'step': 1022, 'epoch': 1} {'type': 'loss', 'content': 0.040819816291332245, 'timestamp': '2025-09-30 23:02:00.981656', 'step': 1023, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:01.063837', 'step': 1023, 'epoch': 1} {'type': 'loss', 'content': 0.0317499078810215, 'timestamp': '2025-09-30 23:02:01.071272', 'step': 1024, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:01.138927', 'step': 1024, 'epoch': 1} {'type': 'loss', 'content': 0.024351095780730247, 'timestamp': '2025-09-30 23:02:01.142378', 'step': 1025, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:01.210298', 'step': 1025, 'epoch': 1} {'type': 'loss', 'content': 0.027607766911387444, 'timestamp': '2025-09-30 23:02:01.213317', 'step': 1026, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:02:01.282677', 'step': 1026, 'epoch': 1} {'type': 'loss', 'content': 0.020753895863890648, 'timestamp': '2025-09-30 23:02:01.289905', 'step': 1027, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:01.365424', 'step': 1027, 'epoch': 1} {'type': 'loss', 'content': 0.029315082356333733, 'timestamp': '2025-09-30 23:02:01.378531', 'step': 1028, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:01.461599', 'step': 1028, 'epoch': 1} {'type': 'loss', 'content': 0.032960135489702225, 'timestamp': '2025-09-30 23:02:01.465378', 'step': 1029, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:01.530396', 'step': 1029, 'epoch': 1} {'type': 'loss', 'content': 0.012831700034439564, 'timestamp': '2025-09-30 23:02:01.540209', 'step': 1030, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:01.622629', 'step': 1030, 'epoch': 1} {'type': 'loss', 'content': 0.027494320645928383, 'timestamp': '2025-09-30 23:02:01.635543', 'step': 1031, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:01.706617', 'step': 1031, 'epoch': 1} {'type': 'loss', 'content': 0.011249407194554806, 'timestamp': '2025-09-30 23:02:01.714134', 'step': 1032, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:02:01.784182', 'step': 1032, 'epoch': 1} {'type': 'loss', 'content': 0.05661269649863243, 'timestamp': '2025-09-30 23:02:01.791161', 'step': 1033, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:01.869404', 'step': 1033, 'epoch': 1} {'type': 'loss', 'content': 0.02596152201294899, 'timestamp': '2025-09-30 23:02:01.873108', 'step': 1034, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:01.943693', 'step': 1034, 'epoch': 1} {'type': 'loss', 'content': 0.03298027440905571, 'timestamp': '2025-09-30 23:02:01.948764', 'step': 1035, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:02.029798', 'step': 1035, 'epoch': 1} {'type': 'loss', 'content': 0.013681789860129356, 'timestamp': '2025-09-30 23:02:02.040885', 'step': 1036, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:02.105990', 'step': 1036, 'epoch': 1} {'type': 'loss', 'content': 0.05078720673918724, 'timestamp': '2025-09-30 23:02:02.115944', 'step': 1037, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:02:02.183316', 'step': 1037, 'epoch': 1} {'type': 'loss', 'content': 0.02189209870994091, 'timestamp': '2025-09-30 23:02:02.194536', 'step': 1038, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:02.277333', 'step': 1038, 'epoch': 1} {'type': 'loss', 'content': 0.024460146203637123, 'timestamp': '2025-09-30 23:02:02.287263', 'step': 1039, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:02.367711', 'step': 1039, 'epoch': 1} {'type': 'loss', 'content': 0.030637988820672035, 'timestamp': '2025-09-30 23:02:02.379749', 'step': 1040, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:02.448804', 'step': 1040, 'epoch': 1} {'type': 'loss', 'content': 0.06677209585905075, 'timestamp': '2025-09-30 23:02:02.457406', 'step': 1041, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:02:02.521059', 'step': 1041, 'epoch': 1} {'type': 'loss', 'content': 0.0484180673956871, 'timestamp': '2025-09-30 23:02:02.529165', 'step': 1042, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:02.601834', 'step': 1042, 'epoch': 1} {'type': 'loss', 'content': 0.06334323436021805, 'timestamp': '2025-09-30 23:02:02.605847', 'step': 1043, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:02.670165', 'step': 1043, 'epoch': 1} {'type': 'loss', 'content': 0.013758046552538872, 'timestamp': '2025-09-30 23:02:02.683539', 'step': 1044, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:02.756874', 'step': 1044, 'epoch': 1} {'type': 'loss', 'content': 0.035871509462594986, 'timestamp': '2025-09-30 23:02:02.759893', 'step': 1045, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:02.829240', 'step': 1045, 'epoch': 1} {'type': 'loss', 'content': 0.018915778025984764, 'timestamp': '2025-09-30 23:02:02.837384', 'step': 1046, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:02.913812', 'step': 1046, 'epoch': 1} {'type': 'loss', 'content': 0.029085734859108925, 'timestamp': '2025-09-30 23:02:02.920783', 'step': 1047, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:02.987419', 'step': 1047, 'epoch': 1} {'type': 'loss', 'content': 0.058769453316926956, 'timestamp': '2025-09-30 23:02:03.003890', 'step': 1048, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:03.085736', 'step': 1048, 'epoch': 1} {'type': 'loss', 'content': 0.041352689266204834, 'timestamp': '2025-09-30 23:02:03.098040', 'step': 1049, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:03.175649', 'step': 1049, 'epoch': 1} {'type': 'loss', 'content': 0.024846091866493225, 'timestamp': '2025-09-30 23:02:03.190215', 'step': 1050, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:02:03.255709', 'step': 1050, 'epoch': 1} {'type': 'loss', 'content': 0.046214792877435684, 'timestamp': '2025-09-30 23:02:03.270395', 'step': 1051, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:03.348098', 'step': 1051, 'epoch': 1} {'type': 'loss', 'content': 0.04535273462533951, 'timestamp': '2025-09-30 23:02:03.362386', 'step': 1052, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:03.438268', 'step': 1052, 'epoch': 1} {'type': 'loss', 'content': 0.016473598778247833, 'timestamp': '2025-09-30 23:02:03.442444', 'step': 1053, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:02:03.525549', 'step': 1053, 'epoch': 1} {'type': 'loss', 'content': 0.022456975653767586, 'timestamp': '2025-09-30 23:02:03.530051', 'step': 1054, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:03.598522', 'step': 1054, 'epoch': 1} {'type': 'loss', 'content': 0.07024087756872177, 'timestamp': '2025-09-30 23:02:03.602200', 'step': 1055, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:03.667904', 'step': 1055, 'epoch': 1} {'type': 'loss', 'content': 0.025472786277532578, 'timestamp': '2025-09-30 23:02:03.681627', 'step': 1056, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:03.744596', 'step': 1056, 'epoch': 1} {'type': 'loss', 'content': 0.024133998900651932, 'timestamp': '2025-09-30 23:02:03.756017', 'step': 1057, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:03.838889', 'step': 1057, 'epoch': 1} {'type': 'loss', 'content': 0.051206279546022415, 'timestamp': '2025-09-30 23:02:03.843429', 'step': 1058, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:03.909241', 'step': 1058, 'epoch': 1} {'type': 'loss', 'content': 0.020129872485995293, 'timestamp': '2025-09-30 23:02:03.918382', 'step': 1059, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:03.986376', 'step': 1059, 'epoch': 1} {'type': 'loss', 'content': 0.018664255738258362, 'timestamp': '2025-09-30 23:02:03.999436', 'step': 1060, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:04.085127', 'step': 1060, 'epoch': 1} {'type': 'loss', 'content': 0.029533151537179947, 'timestamp': '2025-09-30 23:02:04.090710', 'step': 1061, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:04.163221', 'step': 1061, 'epoch': 1} {'type': 'loss', 'content': 0.04851735383272171, 'timestamp': '2025-09-30 23:02:04.166556', 'step': 1062, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:04.232467', 'step': 1062, 'epoch': 1} {'type': 'loss', 'content': 0.06465254724025726, 'timestamp': '2025-09-30 23:02:04.243940', 'step': 1063, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:04.316501', 'step': 1063, 'epoch': 1} {'type': 'loss', 'content': 0.03147207200527191, 'timestamp': '2025-09-30 23:02:04.332635', 'step': 1064, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1596914505344}], 'timestamp': '2025-09-30 23:02:10.065942', 'step': 1064, 'epoch': 1} {'type': 'pplx', 'content': 5740874.223438554, 'timestamp': '2025-09-30 23:02:10.076528', 'step': 1064, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:10.138927', 'step': 1064, 'epoch': 1} {'type': 'loss', 'content': 0.04088989645242691, 'timestamp': '2025-09-30 23:02:10.144211', 'step': 1065, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:10.218568', 'step': 1065, 'epoch': 1} {'type': 'loss', 'content': 0.03352085128426552, 'timestamp': '2025-09-30 23:02:10.226446', 'step': 1066, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:10.297755', 'step': 1066, 'epoch': 1} {'type': 'loss', 'content': 0.008987603709101677, 'timestamp': '2025-09-30 23:02:10.301236', 'step': 1067, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:10.367061', 'step': 1067, 'epoch': 1} {'type': 'loss', 'content': 0.037619855254888535, 'timestamp': '2025-09-30 23:02:10.373610', 'step': 1068, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:10.434021', 'step': 1068, 'epoch': 1} {'type': 'loss', 'content': 0.0012354598147794604, 'timestamp': '2025-09-30 23:02:10.442730', 'step': 1069, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:10.506915', 'step': 1069, 'epoch': 1} {'type': 'loss', 'content': 0.007791322190314531, 'timestamp': '2025-09-30 23:02:10.510874', 'step': 1070, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:02:10.572703', 'step': 1070, 'epoch': 1} {'type': 'loss', 'content': 0.030384834855794907, 'timestamp': '2025-09-30 23:02:10.578301', 'step': 1071, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:10.648912', 'step': 1071, 'epoch': 1} {'type': 'loss', 'content': 0.028138961642980576, 'timestamp': '2025-09-30 23:02:10.655823', 'step': 1072, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:10.731287', 'step': 1072, 'epoch': 1} {'type': 'loss', 'content': 0.04276733845472336, 'timestamp': '2025-09-30 23:02:10.739835', 'step': 1073, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:10.809901', 'step': 1073, 'epoch': 1} {'type': 'loss', 'content': 0.018821541219949722, 'timestamp': '2025-09-30 23:02:10.814823', 'step': 1074, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:10.891069', 'step': 1074, 'epoch': 1} {'type': 'loss', 'content': 0.05837200954556465, 'timestamp': '2025-09-30 23:02:10.895945', 'step': 1075, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:10.973729', 'step': 1075, 'epoch': 1} {'type': 'loss', 'content': 0.0898960754275322, 'timestamp': '2025-09-30 23:02:10.980724', 'step': 1076, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:11.044794', 'step': 1076, 'epoch': 1} {'type': 'loss', 'content': 0.01806132309138775, 'timestamp': '2025-09-30 23:02:11.052829', 'step': 1077, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:11.114630', 'step': 1077, 'epoch': 1} {'type': 'loss', 'content': 0.02343025617301464, 'timestamp': '2025-09-30 23:02:11.117842', 'step': 1078, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:02:11.177841', 'step': 1078, 'epoch': 1} {'type': 'loss', 'content': 0.019483374431729317, 'timestamp': '2025-09-30 23:02:11.181495', 'step': 1079, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:11.259981', 'step': 1079, 'epoch': 1} {'type': 'loss', 'content': 0.030372079461812973, 'timestamp': '2025-09-30 23:02:11.272270', 'step': 1080, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:11.333326', 'step': 1080, 'epoch': 1} {'type': 'loss', 'content': 0.010448075830936432, 'timestamp': '2025-09-30 23:02:11.339321', 'step': 1081, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:11.401730', 'step': 1081, 'epoch': 1} {'type': 'loss', 'content': 0.02819160558283329, 'timestamp': '2025-09-30 23:02:11.407709', 'step': 1082, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:11.475141', 'step': 1082, 'epoch': 1} {'type': 'loss', 'content': 0.011298203840851784, 'timestamp': '2025-09-30 23:02:11.480150', 'step': 1083, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:11.545801', 'step': 1083, 'epoch': 1} {'type': 'loss', 'content': 0.018911615014076233, 'timestamp': '2025-09-30 23:02:11.556775', 'step': 1084, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:11.615561', 'step': 1084, 'epoch': 1} {'type': 'loss', 'content': 0.02146962657570839, 'timestamp': '2025-09-30 23:02:11.618322', 'step': 1085, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:02:11.692774', 'step': 1085, 'epoch': 1} {'type': 'loss', 'content': 0.020229587331414223, 'timestamp': '2025-09-30 23:02:11.697945', 'step': 1086, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:11.759927', 'step': 1086, 'epoch': 1} {'type': 'loss', 'content': 0.012904674746096134, 'timestamp': '2025-09-30 23:02:11.762841', 'step': 1087, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:11.827887', 'step': 1087, 'epoch': 1} {'type': 'loss', 'content': 0.01742463931441307, 'timestamp': '2025-09-30 23:02:11.836284', 'step': 1088, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:11.890553', 'step': 1088, 'epoch': 1} {'type': 'loss', 'content': 0.010115458630025387, 'timestamp': '2025-09-30 23:02:11.896874', 'step': 1089, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:11.956018', 'step': 1089, 'epoch': 1} {'type': 'loss', 'content': 0.023067446425557137, 'timestamp': '2025-09-30 23:02:11.962745', 'step': 1090, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:02:12.024490', 'step': 1090, 'epoch': 1} {'type': 'loss', 'content': 0.007441512309014797, 'timestamp': '2025-09-30 23:02:12.027530', 'step': 1091, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:12.091360', 'step': 1091, 'epoch': 1} {'type': 'loss', 'content': 0.046797942370176315, 'timestamp': '2025-09-30 23:02:12.098890', 'step': 1092, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:12.182527', 'step': 1092, 'epoch': 1} {'type': 'loss', 'content': 0.03250328078866005, 'timestamp': '2025-09-30 23:02:12.192103', 'step': 1093, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:12.253505', 'step': 1093, 'epoch': 1} {'type': 'loss', 'content': 0.04190247505903244, 'timestamp': '2025-09-30 23:02:12.263950', 'step': 1094, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:12.342299', 'step': 1094, 'epoch': 1} {'type': 'loss', 'content': 0.06546450406312943, 'timestamp': '2025-09-30 23:02:12.347393', 'step': 1095, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:12.410672', 'step': 1095, 'epoch': 1} {'type': 'loss', 'content': 0.0296810120344162, 'timestamp': '2025-09-30 23:02:12.417402', 'step': 1096, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:12.479837', 'step': 1096, 'epoch': 1} {'type': 'loss', 'content': 0.014771445654332638, 'timestamp': '2025-09-30 23:02:12.485750', 'step': 1097, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:02:12.543649', 'step': 1097, 'epoch': 1} {'type': 'loss', 'content': 0.03233977407217026, 'timestamp': '2025-09-30 23:02:12.550064', 'step': 1098, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:12.612871', 'step': 1098, 'epoch': 1} {'type': 'loss', 'content': 0.07005280256271362, 'timestamp': '2025-09-30 23:02:12.615707', 'step': 1099, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:12.675583', 'step': 1099, 'epoch': 1} {'type': 'loss', 'content': 0.04418102279305458, 'timestamp': '2025-09-30 23:02:12.681775', 'step': 1100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:12.742465', 'step': 1100, 'epoch': 1} {'type': 'loss', 'content': 0.05845976248383522, 'timestamp': '2025-09-30 23:02:12.745910', 'step': 1101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:12.814091', 'step': 1101, 'epoch': 1} {'type': 'loss', 'content': 0.02908494509756565, 'timestamp': '2025-09-30 23:02:12.827134', 'step': 1102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:12.887239', 'step': 1102, 'epoch': 1} {'type': 'loss', 'content': 0.05160994082689285, 'timestamp': '2025-09-30 23:02:12.896828', 'step': 1103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:02:12.963839', 'step': 1103, 'epoch': 1} {'type': 'loss', 'content': 0.03820434957742691, 'timestamp': '2025-09-30 23:02:12.977327', 'step': 1104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:02:13.035876', 'step': 1104, 'epoch': 1} {'type': 'loss', 'content': 0.03238343447446823, 'timestamp': '2025-09-30 23:02:13.041600', 'step': 1105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:02:13.104643', 'step': 1105, 'epoch': 1} {'type': 'loss', 'content': 0.06847325712442398, 'timestamp': '2025-09-30 23:02:13.111684', 'step': 1106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:13.190374', 'step': 1106, 'epoch': 1} {'type': 'loss', 'content': 0.05524789169430733, 'timestamp': '2025-09-30 23:02:13.194913', 'step': 1107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:02:13.255607', 'step': 1107, 'epoch': 1} {'type': 'loss', 'content': 0.006753443740308285, 'timestamp': '2025-09-30 23:02:13.261773', 'step': 1108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:13.327215', 'step': 1108, 'epoch': 1} {'type': 'loss', 'content': 0.02187785878777504, 'timestamp': '2025-09-30 23:02:13.329872', 'step': 1109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:13.399726', 'step': 1109, 'epoch': 1} {'type': 'loss', 'content': 0.014764326624572277, 'timestamp': '2025-09-30 23:02:13.414263', 'step': 1110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:13.489922', 'step': 1110, 'epoch': 1} {'type': 'loss', 'content': 0.046903956681489944, 'timestamp': '2025-09-30 23:02:13.501208', 'step': 1111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:13.581477', 'step': 1111, 'epoch': 1} {'type': 'loss', 'content': 0.0349484458565712, 'timestamp': '2025-09-30 23:02:13.598682', 'step': 1112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:13.695536', 'step': 1112, 'epoch': 1} {'type': 'loss', 'content': 0.014653066173195839, 'timestamp': '2025-09-30 23:02:13.704197', 'step': 1113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:13.781782', 'step': 1113, 'epoch': 1} {'type': 'loss', 'content': 0.039411939680576324, 'timestamp': '2025-09-30 23:02:13.788436', 'step': 1114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:13.856314', 'step': 1114, 'epoch': 1} {'type': 'loss', 'content': 0.028637347742915154, 'timestamp': '2025-09-30 23:02:13.859706', 'step': 1115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:13.932748', 'step': 1115, 'epoch': 1} {'type': 'loss', 'content': 0.014231164939701557, 'timestamp': '2025-09-30 23:02:13.945269', 'step': 1116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:14.005064', 'step': 1116, 'epoch': 1} {'type': 'loss', 'content': 0.034404926002025604, 'timestamp': '2025-09-30 23:02:14.017506', 'step': 1117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:14.096846', 'step': 1117, 'epoch': 1} {'type': 'loss', 'content': 0.01922706700861454, 'timestamp': '2025-09-30 23:02:14.103850', 'step': 1118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:02:14.169064', 'step': 1118, 'epoch': 1} {'type': 'loss', 'content': 0.0358872152864933, 'timestamp': '2025-09-30 23:02:14.176373', 'step': 1119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:02:14.248425', 'step': 1119, 'epoch': 1} {'type': 'loss', 'content': 0.03125447779893875, 'timestamp': '2025-09-30 23:02:14.255548', 'step': 1120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:02:14.330498', 'step': 1120, 'epoch': 1} {'type': 'loss', 'content': 0.04388003796339035, 'timestamp': '2025-09-30 23:02:14.341766', 'step': 1121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:14.409155', 'step': 1121, 'epoch': 1} {'type': 'loss', 'content': 0.03313955292105675, 'timestamp': '2025-09-30 23:02:14.414796', 'step': 1122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:14.482396', 'step': 1122, 'epoch': 1} {'type': 'loss', 'content': 0.03228220343589783, 'timestamp': '2025-09-30 23:02:14.487633', 'step': 1123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:14.551151', 'step': 1123, 'epoch': 1} {'type': 'loss', 'content': 0.02109159715473652, 'timestamp': '2025-09-30 23:02:14.561502', 'step': 1124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:02:14.621199', 'step': 1124, 'epoch': 1} {'type': 'loss', 'content': 0.05286851525306702, 'timestamp': '2025-09-30 23:02:14.627631', 'step': 1125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:14.714228', 'step': 1125, 'epoch': 1} {'type': 'loss', 'content': 0.018775776028633118, 'timestamp': '2025-09-30 23:02:14.720071', 'step': 1126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:14.797056', 'step': 1126, 'epoch': 1} {'type': 'loss', 'content': 0.010919267311692238, 'timestamp': '2025-09-30 23:02:14.809227', 'step': 1127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:14.886418', 'step': 1127, 'epoch': 1} {'type': 'loss', 'content': 0.026115572080016136, 'timestamp': '2025-09-30 23:02:14.898467', 'step': 1128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:14.957788', 'step': 1128, 'epoch': 1} {'type': 'loss', 'content': 0.0248652882874012, 'timestamp': '2025-09-30 23:02:14.965313', 'step': 1129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:02:15.041340', 'step': 1129, 'epoch': 1} {'type': 'loss', 'content': 0.032639030367136, 'timestamp': '2025-09-30 23:02:15.045161', 'step': 1130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:15.111260', 'step': 1130, 'epoch': 1} {'type': 'loss', 'content': 0.03677981346845627, 'timestamp': '2025-09-30 23:02:15.116821', 'step': 1131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:15.174948', 'step': 1131, 'epoch': 1} {'type': 'loss', 'content': 0.02095671184360981, 'timestamp': '2025-09-30 23:02:15.182921', 'step': 1132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:15.248768', 'step': 1132, 'epoch': 1} {'type': 'loss', 'content': 0.016938140615820885, 'timestamp': '2025-09-30 23:02:15.259302', 'step': 1133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:15.324521', 'step': 1133, 'epoch': 1} {'type': 'loss', 'content': 0.03529065102338791, 'timestamp': '2025-09-30 23:02:15.327525', 'step': 1134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:15.388528', 'step': 1134, 'epoch': 1} {'type': 'loss', 'content': 0.02623806707561016, 'timestamp': '2025-09-30 23:02:15.395102', 'step': 1135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:15.456692', 'step': 1135, 'epoch': 1} {'type': 'loss', 'content': 0.04546913877129555, 'timestamp': '2025-09-30 23:02:15.463267', 'step': 1136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:02:15.524876', 'step': 1136, 'epoch': 1} {'type': 'loss', 'content': 0.025179779157042503, 'timestamp': '2025-09-30 23:02:15.536257', 'step': 1137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:15.605617', 'step': 1137, 'epoch': 1} {'type': 'loss', 'content': 0.05543552711606026, 'timestamp': '2025-09-30 23:02:15.610386', 'step': 1138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:15.684721', 'step': 1138, 'epoch': 1} {'type': 'loss', 'content': 0.015501637943089008, 'timestamp': '2025-09-30 23:02:15.688440', 'step': 1139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:15.761828', 'step': 1139, 'epoch': 1} {'type': 'loss', 'content': 0.04419197514653206, 'timestamp': '2025-09-30 23:02:15.769897', 'step': 1140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:15.838950', 'step': 1140, 'epoch': 1} {'type': 'loss', 'content': 0.03649858757853508, 'timestamp': '2025-09-30 23:02:15.848155', 'step': 1141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:15.922086', 'step': 1141, 'epoch': 1} {'type': 'loss', 'content': 0.04950142651796341, 'timestamp': '2025-09-30 23:02:15.930274', 'step': 1142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:16.002067', 'step': 1142, 'epoch': 1} {'type': 'loss', 'content': 0.016604851931333542, 'timestamp': '2025-09-30 23:02:16.011253', 'step': 1143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:16.084569', 'step': 1143, 'epoch': 1} {'type': 'loss', 'content': 0.016027400270104408, 'timestamp': '2025-09-30 23:02:16.099096', 'step': 1144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:16.162931', 'step': 1144, 'epoch': 1} {'type': 'loss', 'content': 0.005908586084842682, 'timestamp': '2025-09-30 23:02:16.174707', 'step': 1145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:16.255395', 'step': 1145, 'epoch': 1} {'type': 'loss', 'content': 0.0165816992521286, 'timestamp': '2025-09-30 23:02:16.260287', 'step': 1146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:16.334578', 'step': 1146, 'epoch': 1} {'type': 'loss', 'content': 0.015206768177449703, 'timestamp': '2025-09-30 23:02:16.337512', 'step': 1147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:16.413935', 'step': 1147, 'epoch': 1} {'type': 'loss', 'content': 0.049374181777238846, 'timestamp': '2025-09-30 23:02:16.420882', 'step': 1148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:16.478411', 'step': 1148, 'epoch': 1} {'type': 'loss', 'content': 0.008069846779108047, 'timestamp': '2025-09-30 23:02:16.487111', 'step': 1149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:16.562743', 'step': 1149, 'epoch': 1} {'type': 'loss', 'content': 0.0033055341336876154, 'timestamp': '2025-09-30 23:02:16.572530', 'step': 1150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:16.652435', 'step': 1150, 'epoch': 1} {'type': 'loss', 'content': 0.02729937434196472, 'timestamp': '2025-09-30 23:02:16.664318', 'step': 1151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:16.744434', 'step': 1151, 'epoch': 1} {'type': 'loss', 'content': 0.0616995170712471, 'timestamp': '2025-09-30 23:02:16.752539', 'step': 1152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:16.824345', 'step': 1152, 'epoch': 1} {'type': 'loss', 'content': 0.08432452380657196, 'timestamp': '2025-09-30 23:02:16.836801', 'step': 1153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:16.914125', 'step': 1153, 'epoch': 1} {'type': 'loss', 'content': 0.058393578976392746, 'timestamp': '2025-09-30 23:02:16.926525', 'step': 1154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:17.004687', 'step': 1154, 'epoch': 1} {'type': 'loss', 'content': 0.05760359764099121, 'timestamp': '2025-09-30 23:02:17.009406', 'step': 1155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:17.081117', 'step': 1155, 'epoch': 1} {'type': 'loss', 'content': 0.03107055462896824, 'timestamp': '2025-09-30 23:02:17.088727', 'step': 1156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:17.149641', 'step': 1156, 'epoch': 1} {'type': 'loss', 'content': 0.028982188552618027, 'timestamp': '2025-09-30 23:02:17.153835', 'step': 1157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:17.230947', 'step': 1157, 'epoch': 1} {'type': 'loss', 'content': 0.030082907527685165, 'timestamp': '2025-09-30 23:02:17.236502', 'step': 1158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:02:17.306615', 'step': 1158, 'epoch': 1} {'type': 'loss', 'content': 0.01635679416358471, 'timestamp': '2025-09-30 23:02:17.312078', 'step': 1159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:02:17.374577', 'step': 1159, 'epoch': 1} {'type': 'loss', 'content': 0.014785314910113811, 'timestamp': '2025-09-30 23:02:17.388783', 'step': 1160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:17.471700', 'step': 1160, 'epoch': 1} {'type': 'loss', 'content': 0.06177886947989464, 'timestamp': '2025-09-30 23:02:17.484092', 'step': 1161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:17.566710', 'step': 1161, 'epoch': 1} {'type': 'loss', 'content': 0.022266963496804237, 'timestamp': '2025-09-30 23:02:17.571148', 'step': 1162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:17.646183', 'step': 1162, 'epoch': 1} {'type': 'loss', 'content': 0.010052924044430256, 'timestamp': '2025-09-30 23:02:17.652911', 'step': 1163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:17.726403', 'step': 1163, 'epoch': 1} {'type': 'loss', 'content': 0.012803396210074425, 'timestamp': '2025-09-30 23:02:17.741773', 'step': 1164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:02:17.823616', 'step': 1164, 'epoch': 1} {'type': 'loss', 'content': 0.0282357819378376, 'timestamp': '2025-09-30 23:02:17.827063', 'step': 1165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:17.896260', 'step': 1165, 'epoch': 1} {'type': 'loss', 'content': 0.028171539306640625, 'timestamp': '2025-09-30 23:02:17.900433', 'step': 1166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:02:17.971152', 'step': 1166, 'epoch': 1} {'type': 'loss', 'content': 0.018063154071569443, 'timestamp': '2025-09-30 23:02:17.974646', 'step': 1167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:18.054295', 'step': 1167, 'epoch': 1} {'type': 'loss', 'content': 0.039051949977874756, 'timestamp': '2025-09-30 23:02:18.069256', 'step': 1168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:02:18.140021', 'step': 1168, 'epoch': 1} {'type': 'loss', 'content': 0.0586671307682991, 'timestamp': '2025-09-30 23:02:18.144397', 'step': 1169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:18.211049', 'step': 1169, 'epoch': 1} {'type': 'loss', 'content': 0.008575158193707466, 'timestamp': '2025-09-30 23:02:18.222374', 'step': 1170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:18.307874', 'step': 1170, 'epoch': 1} {'type': 'loss', 'content': 0.023417195305228233, 'timestamp': '2025-09-30 23:02:18.311523', 'step': 1171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:18.396758', 'step': 1171, 'epoch': 1} {'type': 'loss', 'content': 0.04033511132001877, 'timestamp': '2025-09-30 23:02:18.415112', 'step': 1172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:18.477709', 'step': 1172, 'epoch': 1} {'type': 'loss', 'content': 0.02101905085146427, 'timestamp': '2025-09-30 23:02:18.483848', 'step': 1173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:18.561729', 'step': 1173, 'epoch': 1} {'type': 'loss', 'content': 0.008245619013905525, 'timestamp': '2025-09-30 23:02:18.575182', 'step': 1174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:18.662011', 'step': 1174, 'epoch': 1} {'type': 'loss', 'content': 0.012748746201395988, 'timestamp': '2025-09-30 23:02:18.667823', 'step': 1175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:18.756465', 'step': 1175, 'epoch': 1} {'type': 'loss', 'content': 0.02177930809557438, 'timestamp': '2025-09-30 23:02:18.773354', 'step': 1176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:18.860408', 'step': 1176, 'epoch': 1} {'type': 'loss', 'content': 0.04741964489221573, 'timestamp': '2025-09-30 23:02:18.874504', 'step': 1177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:02:18.959305', 'step': 1177, 'epoch': 1} {'type': 'loss', 'content': 0.01199111808091402, 'timestamp': '2025-09-30 23:02:18.963938', 'step': 1178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:02:19.033920', 'step': 1178, 'epoch': 1} {'type': 'loss', 'content': 0.042012546211481094, 'timestamp': '2025-09-30 23:02:19.046831', 'step': 1179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:19.127704', 'step': 1179, 'epoch': 1} {'type': 'loss', 'content': 0.03810182586312294, 'timestamp': '2025-09-30 23:02:19.135370', 'step': 1180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:19.218061', 'step': 1180, 'epoch': 1} {'type': 'loss', 'content': 0.021619949489831924, 'timestamp': '2025-09-30 23:02:19.231307', 'step': 1181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:19.315271', 'step': 1181, 'epoch': 1} {'type': 'loss', 'content': 0.022060586139559746, 'timestamp': '2025-09-30 23:02:19.318838', 'step': 1182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:19.403110', 'step': 1182, 'epoch': 1} {'type': 'loss', 'content': 0.02318769320845604, 'timestamp': '2025-09-30 23:02:19.416308', 'step': 1183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:19.503277', 'step': 1183, 'epoch': 1} {'type': 'loss', 'content': 0.07476180791854858, 'timestamp': '2025-09-30 23:02:19.513022', 'step': 1184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:19.598064', 'step': 1184, 'epoch': 1} {'type': 'loss', 'content': 0.0764433816075325, 'timestamp': '2025-09-30 23:02:19.611436', 'step': 1185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:19.699915', 'step': 1185, 'epoch': 1} {'type': 'loss', 'content': 0.013418672606348991, 'timestamp': '2025-09-30 23:02:19.711880', 'step': 1186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:19.795895', 'step': 1186, 'epoch': 1} {'type': 'loss', 'content': 0.05862972512841225, 'timestamp': '2025-09-30 23:02:19.808458', 'step': 1187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:19.892652', 'step': 1187, 'epoch': 1} {'type': 'loss', 'content': 0.014108158648014069, 'timestamp': '2025-09-30 23:02:19.901011', 'step': 1188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:19.987888', 'step': 1188, 'epoch': 1} {'type': 'loss', 'content': 0.04747069999575615, 'timestamp': '2025-09-30 23:02:20.001715', 'step': 1189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:20.099777', 'step': 1189, 'epoch': 1} {'type': 'loss', 'content': 0.0027478800620883703, 'timestamp': '2025-09-30 23:02:20.114390', 'step': 1190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:20.205126', 'step': 1190, 'epoch': 1} {'type': 'loss', 'content': 0.05286743864417076, 'timestamp': '2025-09-30 23:02:20.218767', 'step': 1191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:20.304991', 'step': 1191, 'epoch': 1} {'type': 'loss', 'content': 0.03684652969241142, 'timestamp': '2025-09-30 23:02:20.313708', 'step': 1192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:20.382767', 'step': 1192, 'epoch': 1} {'type': 'loss', 'content': 0.03881510719656944, 'timestamp': '2025-09-30 23:02:20.395534', 'step': 1193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:02:20.479321', 'step': 1193, 'epoch': 1} {'type': 'loss', 'content': 0.040349431335926056, 'timestamp': '2025-09-30 23:02:20.482226', 'step': 1194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:02:20.555263', 'step': 1194, 'epoch': 1} {'type': 'loss', 'content': 0.03169805184006691, 'timestamp': '2025-09-30 23:02:20.566311', 'step': 1195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:20.639084', 'step': 1195, 'epoch': 1} {'type': 'loss', 'content': 0.04227638244628906, 'timestamp': '2025-09-30 23:02:20.653604', 'step': 1196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:20.736684', 'step': 1196, 'epoch': 1} {'type': 'loss', 'content': 0.055409129709005356, 'timestamp': '2025-09-30 23:02:20.750399', 'step': 1197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:02:20.825792', 'step': 1197, 'epoch': 1} {'type': 'loss', 'content': 0.028458457440137863, 'timestamp': '2025-09-30 23:02:20.829408', 'step': 1198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:20.906430', 'step': 1198, 'epoch': 1} {'type': 'loss', 'content': 0.008452237583696842, 'timestamp': '2025-09-30 23:02:20.924523', 'step': 1199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:21.019841', 'step': 1199, 'epoch': 1} {'type': 'loss', 'content': 0.016378233209252357, 'timestamp': '2025-09-30 23:02:21.032533', 'step': 1200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:21.114169', 'step': 1200, 'epoch': 1} {'type': 'loss', 'content': 0.06400636583566666, 'timestamp': '2025-09-30 23:02:21.130110', 'step': 1201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:21.228274', 'step': 1201, 'epoch': 1} {'type': 'loss', 'content': 0.018775062635540962, 'timestamp': '2025-09-30 23:02:21.243493', 'step': 1202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:21.340089', 'step': 1202, 'epoch': 1} {'type': 'loss', 'content': 0.03670623525977135, 'timestamp': '2025-09-30 23:02:21.357441', 'step': 1203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:21.443058', 'step': 1203, 'epoch': 1} {'type': 'loss', 'content': 0.03453930467367172, 'timestamp': '2025-09-30 23:02:21.461729', 'step': 1204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:21.547864', 'step': 1204, 'epoch': 1} {'type': 'loss', 'content': 0.04930257424712181, 'timestamp': '2025-09-30 23:02:21.551193', 'step': 1205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:21.611661', 'step': 1205, 'epoch': 1} {'type': 'loss', 'content': 0.028440413996577263, 'timestamp': '2025-09-30 23:02:21.622892', 'step': 1206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:02:21.711497', 'step': 1206, 'epoch': 1} {'type': 'loss', 'content': 0.011356881819665432, 'timestamp': '2025-09-30 23:02:21.715130', 'step': 1207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:21.800027', 'step': 1207, 'epoch': 1} {'type': 'loss', 'content': 0.04531024023890495, 'timestamp': '2025-09-30 23:02:21.816218', 'step': 1208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:21.904165', 'step': 1208, 'epoch': 1} {'type': 'loss', 'content': 0.02525244653224945, 'timestamp': '2025-09-30 23:02:21.908894', 'step': 1209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:22.001870', 'step': 1209, 'epoch': 1} {'type': 'loss', 'content': 0.04277084022760391, 'timestamp': '2025-09-30 23:02:22.006391', 'step': 1210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:22.097724', 'step': 1210, 'epoch': 1} {'type': 'loss', 'content': 0.04086420312523842, 'timestamp': '2025-09-30 23:02:22.103868', 'step': 1211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:22.175338', 'step': 1211, 'epoch': 1} {'type': 'loss', 'content': 0.017846649512648582, 'timestamp': '2025-09-30 23:02:22.194244', 'step': 1212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:22.280581', 'step': 1212, 'epoch': 1} {'type': 'loss', 'content': 0.031209254637360573, 'timestamp': '2025-09-30 23:02:22.284418', 'step': 1213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:22.346939', 'step': 1213, 'epoch': 1} {'type': 'loss', 'content': 0.0451064333319664, 'timestamp': '2025-09-30 23:02:22.352619', 'step': 1214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:22.414530', 'step': 1214, 'epoch': 1} {'type': 'loss', 'content': 0.023697182536125183, 'timestamp': '2025-09-30 23:02:22.418716', 'step': 1215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:22.495476', 'step': 1215, 'epoch': 1} {'type': 'loss', 'content': 0.04722123593091965, 'timestamp': '2025-09-30 23:02:22.511217', 'step': 1216, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1596914505344}], 'timestamp': '2025-09-30 23:02:28.689124', 'step': 1216, 'epoch': 1} {'type': 'pplx', 'content': 6396090.934392898, 'timestamp': '2025-09-30 23:02:28.694326', 'step': 1216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:28.750490', 'step': 1216, 'epoch': 1} {'type': 'loss', 'content': 0.014810202643275261, 'timestamp': '2025-09-30 23:02:28.764493', 'step': 1217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:28.843046', 'step': 1217, 'epoch': 1} {'type': 'loss', 'content': 0.04099681228399277, 'timestamp': '2025-09-30 23:02:28.856048', 'step': 1218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:02:28.926608', 'step': 1218, 'epoch': 1} {'type': 'loss', 'content': 0.030029358342289925, 'timestamp': '2025-09-30 23:02:28.937973', 'step': 1219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:29.003950', 'step': 1219, 'epoch': 1} {'type': 'loss', 'content': 0.016647258773446083, 'timestamp': '2025-09-30 23:02:29.017753', 'step': 1220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:29.100545', 'step': 1220, 'epoch': 1} {'type': 'loss', 'content': 0.021910667419433594, 'timestamp': '2025-09-30 23:02:29.103554', 'step': 1221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:29.175422', 'step': 1221, 'epoch': 1} {'type': 'loss', 'content': 0.03162529319524765, 'timestamp': '2025-09-30 23:02:29.188402', 'step': 1222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:29.277764', 'step': 1222, 'epoch': 1} {'type': 'loss', 'content': 0.02814841829240322, 'timestamp': '2025-09-30 23:02:29.294610', 'step': 1223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:02:29.396472', 'step': 1223, 'epoch': 1} {'type': 'loss', 'content': 0.019163653254508972, 'timestamp': '2025-09-30 23:02:29.412938', 'step': 1224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:29.518734', 'step': 1224, 'epoch': 1} {'type': 'loss', 'content': 0.0316266305744648, 'timestamp': '2025-09-30 23:02:29.534440', 'step': 1225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:29.627218', 'step': 1225, 'epoch': 1} {'type': 'loss', 'content': 0.02766488492488861, 'timestamp': '2025-09-30 23:02:29.631611', 'step': 1226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:29.711298', 'step': 1226, 'epoch': 1} {'type': 'loss', 'content': 0.02636071853339672, 'timestamp': '2025-09-30 23:02:29.718637', 'step': 1227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:29.816814', 'step': 1227, 'epoch': 1} {'type': 'loss', 'content': 0.040265269577503204, 'timestamp': '2025-09-30 23:02:29.824309', 'step': 1228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:29.883650', 'step': 1228, 'epoch': 1} {'type': 'loss', 'content': 0.03249043598771095, 'timestamp': '2025-09-30 23:02:29.889463', 'step': 1229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:29.974824', 'step': 1229, 'epoch': 1} {'type': 'loss', 'content': 0.028440145775675774, 'timestamp': '2025-09-30 23:02:29.990716', 'step': 1230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:30.053252', 'step': 1230, 'epoch': 1} {'type': 'loss', 'content': 0.04314688220620155, 'timestamp': '2025-09-30 23:02:30.057958', 'step': 1231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:30.133187', 'step': 1231, 'epoch': 1} {'type': 'loss', 'content': 0.05125671252608299, 'timestamp': '2025-09-30 23:02:30.141288', 'step': 1232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:02:30.216599', 'step': 1232, 'epoch': 1} {'type': 'loss', 'content': 0.06298290938138962, 'timestamp': '2025-09-30 23:02:30.221839', 'step': 1233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:30.279545', 'step': 1233, 'epoch': 1} {'type': 'loss', 'content': 0.014711931347846985, 'timestamp': '2025-09-30 23:02:30.285661', 'step': 1234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:30.351786', 'step': 1234, 'epoch': 1} {'type': 'loss', 'content': 0.005673123057931662, 'timestamp': '2025-09-30 23:02:30.366088', 'step': 1235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:30.438496', 'step': 1235, 'epoch': 1} {'type': 'loss', 'content': 0.0195738784968853, 'timestamp': '2025-09-30 23:02:30.448896', 'step': 1236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:02:30.531832', 'step': 1236, 'epoch': 1} {'type': 'loss', 'content': 0.017101742327213287, 'timestamp': '2025-09-30 23:02:30.547302', 'step': 1237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:30.635845', 'step': 1237, 'epoch': 1} {'type': 'loss', 'content': 0.04808877035975456, 'timestamp': '2025-09-30 23:02:30.639718', 'step': 1238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:02:30.708365', 'step': 1238, 'epoch': 1} {'type': 'loss', 'content': 0.04543078690767288, 'timestamp': '2025-09-30 23:02:30.713298', 'step': 1239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:30.793869', 'step': 1239, 'epoch': 1} {'type': 'loss', 'content': 0.032103050500154495, 'timestamp': '2025-09-30 23:02:30.811377', 'step': 1240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:30.872242', 'step': 1240, 'epoch': 1} {'type': 'loss', 'content': 0.055129922926425934, 'timestamp': '2025-09-30 23:02:30.888211', 'step': 1241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:30.963520', 'step': 1241, 'epoch': 1} {'type': 'loss', 'content': 0.0537540428340435, 'timestamp': '2025-09-30 23:02:30.968172', 'step': 1242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:31.052890', 'step': 1242, 'epoch': 1} {'type': 'loss', 'content': 0.041102878749370575, 'timestamp': '2025-09-30 23:02:31.067994', 'step': 1243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:31.159191', 'step': 1243, 'epoch': 1} {'type': 'loss', 'content': 0.03105335868895054, 'timestamp': '2025-09-30 23:02:31.177652', 'step': 1244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:31.246040', 'step': 1244, 'epoch': 1} {'type': 'loss', 'content': 0.048835255205631256, 'timestamp': '2025-09-30 23:02:31.251762', 'step': 1245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:31.312085', 'step': 1245, 'epoch': 1} {'type': 'loss', 'content': 0.044188421219587326, 'timestamp': '2025-09-30 23:02:31.328073', 'step': 1246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:02:31.421713', 'step': 1246, 'epoch': 1} {'type': 'loss', 'content': 0.04199795052409172, 'timestamp': '2025-09-30 23:02:31.438759', 'step': 1247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:02:31.524446', 'step': 1247, 'epoch': 1} {'type': 'loss', 'content': 0.029569879174232483, 'timestamp': '2025-09-30 23:02:31.541930', 'step': 1248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:02:31.623583', 'step': 1248, 'epoch': 1} {'type': 'loss', 'content': 0.05169856920838356, 'timestamp': '2025-09-30 23:02:31.628418', 'step': 1249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:31.691184', 'step': 1249, 'epoch': 1} {'type': 'loss', 'content': 0.02208312414586544, 'timestamp': '2025-09-30 23:02:31.707702', 'step': 1250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:31.780795', 'step': 1250, 'epoch': 1} {'type': 'loss', 'content': 0.030258724465966225, 'timestamp': '2025-09-30 23:02:31.797647', 'step': 1251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:31.867757', 'step': 1251, 'epoch': 1} {'type': 'loss', 'content': 0.05788459628820419, 'timestamp': '2025-09-30 23:02:31.886569', 'step': 1252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:31.976655', 'step': 1252, 'epoch': 1} {'type': 'loss', 'content': 0.030675863847136497, 'timestamp': '2025-09-30 23:02:31.992941', 'step': 1253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:32.086280', 'step': 1253, 'epoch': 1} {'type': 'loss', 'content': 0.06333709508180618, 'timestamp': '2025-09-30 23:02:32.102250', 'step': 1254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:32.169667', 'step': 1254, 'epoch': 1} {'type': 'loss', 'content': 0.02253095805644989, 'timestamp': '2025-09-30 23:02:32.185349', 'step': 1255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:32.278865', 'step': 1255, 'epoch': 1} {'type': 'loss', 'content': 0.05318695306777954, 'timestamp': '2025-09-30 23:02:32.286821', 'step': 1256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:32.345349', 'step': 1256, 'epoch': 1} {'type': 'loss', 'content': 0.04992804676294327, 'timestamp': '2025-09-30 23:02:32.349062', 'step': 1257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:32.432183', 'step': 1257, 'epoch': 1} {'type': 'loss', 'content': 0.028370970860123634, 'timestamp': '2025-09-30 23:02:32.438921', 'step': 1258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:32.532658', 'step': 1258, 'epoch': 1} {'type': 'loss', 'content': 0.021120499819517136, 'timestamp': '2025-09-30 23:02:32.547577', 'step': 1259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:32.642030', 'step': 1259, 'epoch': 1} {'type': 'loss', 'content': 0.062054190784692764, 'timestamp': '2025-09-30 23:02:32.661379', 'step': 1260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:32.744018', 'step': 1260, 'epoch': 1} {'type': 'loss', 'content': 0.030207501724362373, 'timestamp': '2025-09-30 23:02:32.749669', 'step': 1261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:02:32.843437', 'step': 1261, 'epoch': 1} {'type': 'loss', 'content': 0.020355384796857834, 'timestamp': '2025-09-30 23:02:32.847612', 'step': 1262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:32.932114', 'step': 1262, 'epoch': 1} {'type': 'loss', 'content': 0.033509839326143265, 'timestamp': '2025-09-30 23:02:32.936308', 'step': 1263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:33.009212', 'step': 1263, 'epoch': 1} {'type': 'loss', 'content': 0.041473060846328735, 'timestamp': '2025-09-30 23:02:33.017152', 'step': 1264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:33.097664', 'step': 1264, 'epoch': 1} {'type': 'loss', 'content': 0.018560275435447693, 'timestamp': '2025-09-30 23:02:33.103599', 'step': 1265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:33.163897', 'step': 1265, 'epoch': 1} {'type': 'loss', 'content': 0.05942429229617119, 'timestamp': '2025-09-30 23:02:33.179615', 'step': 1266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:02:33.253282', 'step': 1266, 'epoch': 1} {'type': 'loss', 'content': 0.017540527507662773, 'timestamp': '2025-09-30 23:02:33.258025', 'step': 1267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:33.350422', 'step': 1267, 'epoch': 1} {'type': 'loss', 'content': 0.02088421769440174, 'timestamp': '2025-09-30 23:02:33.358278', 'step': 1268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:33.452915', 'step': 1268, 'epoch': 1} {'type': 'loss', 'content': 0.013124157674610615, 'timestamp': '2025-09-30 23:02:33.468195', 'step': 1269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:33.561782', 'step': 1269, 'epoch': 1} {'type': 'loss', 'content': 0.04011770337820053, 'timestamp': '2025-09-30 23:02:33.575710', 'step': 1270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:33.649707', 'step': 1270, 'epoch': 1} {'type': 'loss', 'content': 0.01955719292163849, 'timestamp': '2025-09-30 23:02:33.654538', 'step': 1271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:33.724925', 'step': 1271, 'epoch': 1} {'type': 'loss', 'content': 0.040954504162073135, 'timestamp': '2025-09-30 23:02:33.744446', 'step': 1272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:33.819802', 'step': 1272, 'epoch': 1} {'type': 'loss', 'content': 0.05431073531508446, 'timestamp': '2025-09-30 23:02:33.836332', 'step': 1273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:33.898412', 'step': 1273, 'epoch': 1} {'type': 'loss', 'content': 0.014645031653344631, 'timestamp': '2025-09-30 23:02:33.913554', 'step': 1274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:02:33.985604', 'step': 1274, 'epoch': 1} {'type': 'loss', 'content': 0.010305159725248814, 'timestamp': '2025-09-30 23:02:33.991179', 'step': 1275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:34.071165', 'step': 1275, 'epoch': 1} {'type': 'loss', 'content': 0.01199676375836134, 'timestamp': '2025-09-30 23:02:34.080730', 'step': 1276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:34.152332', 'step': 1276, 'epoch': 1} {'type': 'loss', 'content': 0.01167107094079256, 'timestamp': '2025-09-30 23:02:34.156946', 'step': 1277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:34.250267', 'step': 1277, 'epoch': 1} {'type': 'loss', 'content': 0.06565254926681519, 'timestamp': '2025-09-30 23:02:34.253757', 'step': 1278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:34.317678', 'step': 1278, 'epoch': 1} {'type': 'loss', 'content': 0.03196311369538307, 'timestamp': '2025-09-30 23:02:34.324180', 'step': 1279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:34.397254', 'step': 1279, 'epoch': 1} {'type': 'loss', 'content': 0.025031721219420433, 'timestamp': '2025-09-30 23:02:34.407297', 'step': 1280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:02:34.478337', 'step': 1280, 'epoch': 1} {'type': 'loss', 'content': 0.03106205351650715, 'timestamp': '2025-09-30 23:02:34.484987', 'step': 1281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:34.556899', 'step': 1281, 'epoch': 1} {'type': 'loss', 'content': 0.018876537680625916, 'timestamp': '2025-09-30 23:02:34.563715', 'step': 1282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:02:34.624302', 'step': 1282, 'epoch': 1} {'type': 'loss', 'content': 0.03105228953063488, 'timestamp': '2025-09-30 23:02:34.639949', 'step': 1283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:02:34.724740', 'step': 1283, 'epoch': 1} {'type': 'loss', 'content': 0.03092164732515812, 'timestamp': '2025-09-30 23:02:34.733107', 'step': 1284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:34.805894', 'step': 1284, 'epoch': 1} {'type': 'loss', 'content': 0.03147723525762558, 'timestamp': '2025-09-30 23:02:34.809982', 'step': 1285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:34.874834', 'step': 1285, 'epoch': 1} {'type': 'loss', 'content': 0.04682992026209831, 'timestamp': '2025-09-30 23:02:34.880010', 'step': 1286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:34.949001', 'step': 1286, 'epoch': 1} {'type': 'loss', 'content': 0.012937872670590878, 'timestamp': '2025-09-30 23:02:34.952348', 'step': 1287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:35.026906', 'step': 1287, 'epoch': 1} {'type': 'loss', 'content': 0.007133714854717255, 'timestamp': '2025-09-30 23:02:35.035670', 'step': 1288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:35.121784', 'step': 1288, 'epoch': 1} {'type': 'loss', 'content': 0.04782027006149292, 'timestamp': '2025-09-30 23:02:35.133635', 'step': 1289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:02:35.209249', 'step': 1289, 'epoch': 1} {'type': 'loss', 'content': 0.06537818163633347, 'timestamp': '2025-09-30 23:02:35.214445', 'step': 1290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:02:35.286258', 'step': 1290, 'epoch': 1} {'type': 'loss', 'content': 0.08201859146356583, 'timestamp': '2025-09-30 23:02:35.291013', 'step': 1291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:35.378510', 'step': 1291, 'epoch': 1} {'type': 'loss', 'content': 0.06334369629621506, 'timestamp': '2025-09-30 23:02:35.386362', 'step': 1292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:35.474994', 'step': 1292, 'epoch': 1} {'type': 'loss', 'content': 0.023249315097928047, 'timestamp': '2025-09-30 23:02:35.480895', 'step': 1293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:02:35.549558', 'step': 1293, 'epoch': 1} {'type': 'loss', 'content': 0.07157095521688461, 'timestamp': '2025-09-30 23:02:35.562561', 'step': 1294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:35.644225', 'step': 1294, 'epoch': 1} {'type': 'loss', 'content': 0.03932400792837143, 'timestamp': '2025-09-30 23:02:35.648207', 'step': 1295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:02:35.719520', 'step': 1295, 'epoch': 1} {'type': 'loss', 'content': 0.028652625158429146, 'timestamp': '2025-09-30 23:02:35.738774', 'step': 1296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:35.826737', 'step': 1296, 'epoch': 1} {'type': 'loss', 'content': 0.019638454541563988, 'timestamp': '2025-09-30 23:02:35.833379', 'step': 1297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:02:35.919820', 'step': 1297, 'epoch': 1} {'type': 'loss', 'content': 0.010104341432452202, 'timestamp': '2025-09-30 23:02:35.935687', 'step': 1298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:36.003902', 'step': 1298, 'epoch': 1} {'type': 'loss', 'content': 0.0169498510658741, 'timestamp': '2025-09-30 23:02:36.016862', 'step': 1299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:02:36.104246', 'step': 1299, 'epoch': 1} {'type': 'loss', 'content': 0.020724382251501083, 'timestamp': '2025-09-30 23:02:36.114236', 'step': 1300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:36.181163', 'step': 1300, 'epoch': 1} {'type': 'loss', 'content': 0.028636718168854713, 'timestamp': '2025-09-30 23:02:36.196830', 'step': 1301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:36.264297', 'step': 1301, 'epoch': 1} {'type': 'loss', 'content': 0.040781427174806595, 'timestamp': '2025-09-30 23:02:36.278286', 'step': 1302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:36.366141', 'step': 1302, 'epoch': 1} {'type': 'loss', 'content': 0.017747025936841965, 'timestamp': '2025-09-30 23:02:36.380239', 'step': 1303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:02:36.464501', 'step': 1303, 'epoch': 1} {'type': 'loss', 'content': 0.006385589484125376, 'timestamp': '2025-09-30 23:02:36.483527', 'step': 1304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:36.541978', 'step': 1304, 'epoch': 1} {'type': 'loss', 'content': 0.024638313800096512, 'timestamp': '2025-09-30 23:02:36.556046', 'step': 1305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:36.643959', 'step': 1305, 'epoch': 1} {'type': 'loss', 'content': 0.0015924833714962006, 'timestamp': '2025-09-30 23:02:36.658173', 'step': 1306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:36.739550', 'step': 1306, 'epoch': 1} {'type': 'loss', 'content': 0.021432461217045784, 'timestamp': '2025-09-30 23:02:36.750786', 'step': 1307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:02:36.834015', 'step': 1307, 'epoch': 1} {'type': 'loss', 'content': 0.01792985573410988, 'timestamp': '2025-09-30 23:02:36.850609', 'step': 1308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:02:36.921219', 'step': 1308, 'epoch': 1} {'type': 'loss', 'content': 0.06487541645765305, 'timestamp': '2025-09-30 23:02:36.930549', 'step': 1309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:36.991269', 'step': 1309, 'epoch': 1} {'type': 'loss', 'content': 0.062438108026981354, 'timestamp': '2025-09-30 23:02:36.996230', 'step': 1310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:37.068499', 'step': 1310, 'epoch': 1} {'type': 'loss', 'content': 0.03815298527479172, 'timestamp': '2025-09-30 23:02:37.083981', 'step': 1311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:02:37.172717', 'step': 1311, 'epoch': 1} {'type': 'loss', 'content': 0.019075904041528702, 'timestamp': '2025-09-30 23:02:37.195602', 'step': 1312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:02:37.293049', 'step': 1312, 'epoch': 1} {'type': 'loss', 'content': 0.0043448833748698235, 'timestamp': '2025-09-30 23:02:37.307630', 'step': 1313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:37.376275', 'step': 1313, 'epoch': 1} {'type': 'loss', 'content': 0.03282402083277702, 'timestamp': '2025-09-30 23:02:37.379961', 'step': 1314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:37.460336', 'step': 1314, 'epoch': 1} {'type': 'loss', 'content': 0.0347294956445694, 'timestamp': '2025-09-30 23:02:37.475698', 'step': 1315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:02:37.533102', 'step': 1315, 'epoch': 1} {'type': 'loss', 'content': 0.07435228675603867, 'timestamp': '2025-09-30 23:02:37.541342', 'step': 1316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:37.608972', 'step': 1316, 'epoch': 1} {'type': 'loss', 'content': 0.003961631096899509, 'timestamp': '2025-09-30 23:02:37.613287', 'step': 1317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:37.690553', 'step': 1317, 'epoch': 1} {'type': 'loss', 'content': 0.047447558492422104, 'timestamp': '2025-09-30 23:02:37.701477', 'step': 1318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:02:37.769225', 'step': 1318, 'epoch': 1} {'type': 'loss', 'content': 0.0026749682147055864, 'timestamp': '2025-09-30 23:02:37.778741', 'step': 1319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:37.887026', 'step': 1319, 'epoch': 1} {'type': 'loss', 'content': 0.0739743784070015, 'timestamp': '2025-09-30 23:02:37.901552', 'step': 1320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:37.971139', 'step': 1320, 'epoch': 1} {'type': 'loss', 'content': 0.05812511965632439, 'timestamp': '2025-09-30 23:02:37.986128', 'step': 1321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:38.053913', 'step': 1321, 'epoch': 1} {'type': 'loss', 'content': 0.02982259728014469, 'timestamp': '2025-09-30 23:02:38.066789', 'step': 1322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:02:38.142299', 'step': 1322, 'epoch': 1} {'type': 'loss', 'content': 0.05156226083636284, 'timestamp': '2025-09-30 23:02:38.147384', 'step': 1323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:38.221671', 'step': 1323, 'epoch': 1} {'type': 'loss', 'content': 0.025317931547760963, 'timestamp': '2025-09-30 23:02:38.230542', 'step': 1324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:38.288553', 'step': 1324, 'epoch': 1} {'type': 'loss', 'content': 0.014323639683425426, 'timestamp': '2025-09-30 23:02:38.294665', 'step': 1325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:38.363183', 'step': 1325, 'epoch': 1} {'type': 'loss', 'content': 0.02622639574110508, 'timestamp': '2025-09-30 23:02:38.368412', 'step': 1326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:38.449364', 'step': 1326, 'epoch': 1} {'type': 'loss', 'content': 0.027728160843253136, 'timestamp': '2025-09-30 23:02:38.453762', 'step': 1327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:02:38.522777', 'step': 1327, 'epoch': 1} {'type': 'loss', 'content': 0.015366262756288052, 'timestamp': '2025-09-30 23:02:38.532064', 'step': 1328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:38.620865', 'step': 1328, 'epoch': 1} {'type': 'loss', 'content': 0.0345725379884243, 'timestamp': '2025-09-30 23:02:38.629315', 'step': 1329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:38.728298', 'step': 1329, 'epoch': 1} {'type': 'loss', 'content': 0.04088530316948891, 'timestamp': '2025-09-30 23:02:38.737581', 'step': 1330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:38.808201', 'step': 1330, 'epoch': 1} {'type': 'loss', 'content': 0.046113405376672745, 'timestamp': '2025-09-30 23:02:38.816449', 'step': 1331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:38.896753', 'step': 1331, 'epoch': 1} {'type': 'loss', 'content': 0.041325248777866364, 'timestamp': '2025-09-30 23:02:38.915352', 'step': 1332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:38.972662', 'step': 1332, 'epoch': 1} {'type': 'loss', 'content': 0.008489655330777168, 'timestamp': '2025-09-30 23:02:38.983616', 'step': 1333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:02:39.058370', 'step': 1333, 'epoch': 1} {'type': 'loss', 'content': 0.03681110218167305, 'timestamp': '2025-09-30 23:02:39.066086', 'step': 1334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:39.147740', 'step': 1334, 'epoch': 1} {'type': 'loss', 'content': 0.05044196918606758, 'timestamp': '2025-09-30 23:02:39.152967', 'step': 1335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:39.219077', 'step': 1335, 'epoch': 1} {'type': 'loss', 'content': 0.03448326513171196, 'timestamp': '2025-09-30 23:02:39.232770', 'step': 1336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:39.305804', 'step': 1336, 'epoch': 1} {'type': 'loss', 'content': 0.03736647218465805, 'timestamp': '2025-09-30 23:02:39.310588', 'step': 1337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:02:39.380634', 'step': 1337, 'epoch': 1} {'type': 'loss', 'content': 0.04798685759305954, 'timestamp': '2025-09-30 23:02:39.384604', 'step': 1338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:39.462251', 'step': 1338, 'epoch': 1} {'type': 'loss', 'content': 0.03883186727762222, 'timestamp': '2025-09-30 23:02:39.466766', 'step': 1339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:02:39.535407', 'step': 1339, 'epoch': 1} {'type': 'loss', 'content': 0.07364342361688614, 'timestamp': '2025-09-30 23:02:39.545922', 'step': 1340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:39.628683', 'step': 1340, 'epoch': 1} {'type': 'loss', 'content': 0.03889190033078194, 'timestamp': '2025-09-30 23:02:39.643018', 'step': 1341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:39.737410', 'step': 1341, 'epoch': 1} {'type': 'loss', 'content': 0.020309949293732643, 'timestamp': '2025-09-30 23:02:39.742263', 'step': 1342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:39.831018', 'step': 1342, 'epoch': 1} {'type': 'loss', 'content': 0.0280343908816576, 'timestamp': '2025-09-30 23:02:39.847827', 'step': 1343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:02:39.938106', 'step': 1343, 'epoch': 1} {'type': 'loss', 'content': 0.03078913688659668, 'timestamp': '2025-09-30 23:02:39.956241', 'step': 1344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:40.044727', 'step': 1344, 'epoch': 1} {'type': 'loss', 'content': 0.01863262429833412, 'timestamp': '2025-09-30 23:02:40.057580', 'step': 1345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:40.145640', 'step': 1345, 'epoch': 1} {'type': 'loss', 'content': 0.060617413371801376, 'timestamp': '2025-09-30 23:02:40.158703', 'step': 1346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:40.243543', 'step': 1346, 'epoch': 1} {'type': 'loss', 'content': 0.024311697110533714, 'timestamp': '2025-09-30 23:02:40.248192', 'step': 1347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:40.330772', 'step': 1347, 'epoch': 1} {'type': 'loss', 'content': 0.03749660402536392, 'timestamp': '2025-09-30 23:02:40.345527', 'step': 1348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:40.423522', 'step': 1348, 'epoch': 1} {'type': 'loss', 'content': 0.020641924813389778, 'timestamp': '2025-09-30 23:02:40.427502', 'step': 1349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:02:40.489233', 'step': 1349, 'epoch': 1} {'type': 'loss', 'content': 0.012494887225329876, 'timestamp': '2025-09-30 23:02:40.501387', 'step': 1350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:40.577280', 'step': 1350, 'epoch': 1} {'type': 'loss', 'content': 0.036680303514003754, 'timestamp': '2025-09-30 23:02:40.593946', 'step': 1351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:40.691704', 'step': 1351, 'epoch': 1} {'type': 'loss', 'content': 0.028162196278572083, 'timestamp': '2025-09-30 23:02:40.713163', 'step': 1352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:02:40.788804', 'step': 1352, 'epoch': 1} {'type': 'loss', 'content': 0.034411728382110596, 'timestamp': '2025-09-30 23:02:40.802498', 'step': 1353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:40.880737', 'step': 1353, 'epoch': 1} {'type': 'loss', 'content': 0.05305245518684387, 'timestamp': '2025-09-30 23:02:40.894904', 'step': 1354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:40.984244', 'step': 1354, 'epoch': 1} {'type': 'loss', 'content': 0.03589671105146408, 'timestamp': '2025-09-30 23:02:40.999414', 'step': 1355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:41.060320', 'step': 1355, 'epoch': 1} {'type': 'loss', 'content': 0.025975940749049187, 'timestamp': '2025-09-30 23:02:41.070508', 'step': 1356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:02:41.148190', 'step': 1356, 'epoch': 1} {'type': 'loss', 'content': 0.03293853625655174, 'timestamp': '2025-09-30 23:02:41.158540', 'step': 1357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:41.240308', 'step': 1357, 'epoch': 1} {'type': 'loss', 'content': 0.05328277498483658, 'timestamp': '2025-09-30 23:02:41.245034', 'step': 1358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:41.309807', 'step': 1358, 'epoch': 1} {'type': 'loss', 'content': 0.028315136209130287, 'timestamp': '2025-09-30 23:02:41.323811', 'step': 1359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:02:41.396695', 'step': 1359, 'epoch': 1} {'type': 'loss', 'content': 0.01787687838077545, 'timestamp': '2025-09-30 23:02:41.404715', 'step': 1360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:41.465032', 'step': 1360, 'epoch': 1} {'type': 'loss', 'content': 0.03949100151658058, 'timestamp': '2025-09-30 23:02:41.468537', 'step': 1361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:02:41.543391', 'step': 1361, 'epoch': 1} {'type': 'loss', 'content': 0.03438861668109894, 'timestamp': '2025-09-30 23:02:41.550671', 'step': 1362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:02:41.617490', 'step': 1362, 'epoch': 1} {'type': 'loss', 'content': 0.02649403177201748, 'timestamp': '2025-09-30 23:02:41.621919', 'step': 1363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:02:41.683276', 'step': 1363, 'epoch': 1} {'type': 'loss', 'content': 0.017517726868391037, 'timestamp': '2025-09-30 23:02:41.690618', 'step': 1364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:41.749493', 'step': 1364, 'epoch': 1} {'type': 'loss', 'content': 0.027495095506310463, 'timestamp': '2025-09-30 23:02:41.752744', 'step': 1365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:41.827181', 'step': 1365, 'epoch': 1} {'type': 'loss', 'content': 0.024953704327344894, 'timestamp': '2025-09-30 23:02:41.839214', 'step': 1366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:02:41.907584', 'step': 1366, 'epoch': 1} {'type': 'loss', 'content': 0.03166994825005531, 'timestamp': '2025-09-30 23:02:41.917898', 'step': 1367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:41.975466', 'step': 1367, 'epoch': 1} {'type': 'loss', 'content': 0.02836683951318264, 'timestamp': '2025-09-30 23:02:41.991216', 'step': 1368, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1596914505344}], 'timestamp': '2025-09-30 23:02:46.922755', 'step': 1368, 'epoch': 1} {'type': 'pplx', 'content': 7358881.3050526, 'timestamp': '2025-09-30 23:02:46.927339', 'step': 1368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:46.988201', 'step': 1368, 'epoch': 1} {'type': 'loss', 'content': 0.061190176755189896, 'timestamp': '2025-09-30 23:02:46.994353', 'step': 1369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:47.065349', 'step': 1369, 'epoch': 1} {'type': 'loss', 'content': 0.038190267980098724, 'timestamp': '2025-09-30 23:02:47.076294', 'step': 1370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:02:47.138848', 'step': 1370, 'epoch': 1} {'type': 'loss', 'content': 0.020524710416793823, 'timestamp': '2025-09-30 23:02:47.145927', 'step': 1371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:47.214410', 'step': 1371, 'epoch': 1} {'type': 'loss', 'content': 0.043021757155656815, 'timestamp': '2025-09-30 23:02:47.225775', 'step': 1372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:47.300000', 'step': 1372, 'epoch': 1} {'type': 'loss', 'content': 0.03539930656552315, 'timestamp': '2025-09-30 23:02:47.308316', 'step': 1373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:47.380555', 'step': 1373, 'epoch': 1} {'type': 'loss', 'content': 0.03017817810177803, 'timestamp': '2025-09-30 23:02:47.384311', 'step': 1374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:47.456619', 'step': 1374, 'epoch': 1} {'type': 'loss', 'content': 0.02290624938905239, 'timestamp': '2025-09-30 23:02:47.465718', 'step': 1375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:47.536940', 'step': 1375, 'epoch': 1} {'type': 'loss', 'content': 0.027828166261315346, 'timestamp': '2025-09-30 23:02:47.550520', 'step': 1376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:47.623464', 'step': 1376, 'epoch': 1} {'type': 'loss', 'content': 0.04250097647309303, 'timestamp': '2025-09-30 23:02:47.627912', 'step': 1377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:47.790880', 'step': 1377, 'epoch': 1} {'type': 'loss', 'content': 0.04765627905726433, 'timestamp': '2025-09-30 23:02:47.794972', 'step': 1378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:47.873241', 'step': 1378, 'epoch': 1} {'type': 'loss', 'content': 0.027691150084137917, 'timestamp': '2025-09-30 23:02:47.877369', 'step': 1379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:02:47.947175', 'step': 1379, 'epoch': 1} {'type': 'loss', 'content': 0.07987367361783981, 'timestamp': '2025-09-30 23:02:47.958121', 'step': 1380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:48.016732', 'step': 1380, 'epoch': 1} {'type': 'loss', 'content': 0.05407799407839775, 'timestamp': '2025-09-30 23:02:48.020083', 'step': 1381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:48.088815', 'step': 1381, 'epoch': 1} {'type': 'loss', 'content': 0.045732613652944565, 'timestamp': '2025-09-30 23:02:48.097948', 'step': 1382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:48.158368', 'step': 1382, 'epoch': 1} {'type': 'loss', 'content': 0.032295700162649155, 'timestamp': '2025-09-30 23:02:48.171823', 'step': 1383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:48.232718', 'step': 1383, 'epoch': 1} {'type': 'loss', 'content': 0.018557783216238022, 'timestamp': '2025-09-30 23:02:48.241015', 'step': 1384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:48.314557', 'step': 1384, 'epoch': 1} {'type': 'loss', 'content': 0.011694935150444508, 'timestamp': '2025-09-30 23:02:48.321459', 'step': 1385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:02:48.399820', 'step': 1385, 'epoch': 1} {'type': 'loss', 'content': 0.009439828805625439, 'timestamp': '2025-09-30 23:02:48.413079', 'step': 1386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:48.488496', 'step': 1386, 'epoch': 1} {'type': 'loss', 'content': 0.014965587295591831, 'timestamp': '2025-09-30 23:02:48.496522', 'step': 1387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:48.572543', 'step': 1387, 'epoch': 1} {'type': 'loss', 'content': 0.03077077679336071, 'timestamp': '2025-09-30 23:02:48.587678', 'step': 1388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:48.660021', 'step': 1388, 'epoch': 1} {'type': 'loss', 'content': 0.010819890536367893, 'timestamp': '2025-09-30 23:02:48.670775', 'step': 1389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:48.742498', 'step': 1389, 'epoch': 1} {'type': 'loss', 'content': 0.0248111542314291, 'timestamp': '2025-09-30 23:02:48.754542', 'step': 1390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:48.820171', 'step': 1390, 'epoch': 1} {'type': 'loss', 'content': 0.0428164079785347, 'timestamp': '2025-09-30 23:02:48.825233', 'step': 1391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:48.888753', 'step': 1391, 'epoch': 1} {'type': 'loss', 'content': 0.018500808626413345, 'timestamp': '2025-09-30 23:02:48.897911', 'step': 1392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:48.978587', 'step': 1392, 'epoch': 1} {'type': 'loss', 'content': 0.02890787646174431, 'timestamp': '2025-09-30 23:02:48.987547', 'step': 1393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:49.058153', 'step': 1393, 'epoch': 1} {'type': 'loss', 'content': 0.008713687770068645, 'timestamp': '2025-09-30 23:02:49.062478', 'step': 1394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:49.131232', 'step': 1394, 'epoch': 1} {'type': 'loss', 'content': 0.019261494278907776, 'timestamp': '2025-09-30 23:02:49.143001', 'step': 1395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:02:49.209792', 'step': 1395, 'epoch': 1} {'type': 'loss', 'content': 0.02706124447286129, 'timestamp': '2025-09-30 23:02:49.225445', 'step': 1396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:49.290884', 'step': 1396, 'epoch': 1} {'type': 'loss', 'content': 0.006588311400264502, 'timestamp': '2025-09-30 23:02:49.303983', 'step': 1397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:49.365618', 'step': 1397, 'epoch': 1} {'type': 'loss', 'content': 0.015812495723366737, 'timestamp': '2025-09-30 23:02:49.369386', 'step': 1398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:49.439773', 'step': 1398, 'epoch': 1} {'type': 'loss', 'content': 0.016767334192991257, 'timestamp': '2025-09-30 23:02:49.450802', 'step': 1399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:49.516919', 'step': 1399, 'epoch': 1} {'type': 'loss', 'content': 0.004952180664986372, 'timestamp': '2025-09-30 23:02:49.529216', 'step': 1400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:49.616172', 'step': 1400, 'epoch': 1} {'type': 'loss', 'content': 0.016310686245560646, 'timestamp': '2025-09-30 23:02:49.629633', 'step': 1401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:02:49.709764', 'step': 1401, 'epoch': 1} {'type': 'loss', 'content': 0.03767526149749756, 'timestamp': '2025-09-30 23:02:49.714945', 'step': 1402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:49.785072', 'step': 1402, 'epoch': 1} {'type': 'loss', 'content': 0.021010611206293106, 'timestamp': '2025-09-30 23:02:49.795076', 'step': 1403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:49.878465', 'step': 1403, 'epoch': 1} {'type': 'loss', 'content': 0.02332903817296028, 'timestamp': '2025-09-30 23:02:49.894161', 'step': 1404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:49.957295', 'step': 1404, 'epoch': 1} {'type': 'loss', 'content': 0.07125699520111084, 'timestamp': '2025-09-30 23:02:49.970526', 'step': 1405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:02:50.031479', 'step': 1405, 'epoch': 1} {'type': 'loss', 'content': 0.012547260150313377, 'timestamp': '2025-09-30 23:02:50.044118', 'step': 1406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:50.115024', 'step': 1406, 'epoch': 1} {'type': 'loss', 'content': 0.06436596810817719, 'timestamp': '2025-09-30 23:02:50.119257', 'step': 1407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:50.181404', 'step': 1407, 'epoch': 1} {'type': 'loss', 'content': 0.02093428000807762, 'timestamp': '2025-09-30 23:02:50.198094', 'step': 1408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:02:50.262500', 'step': 1408, 'epoch': 1} {'type': 'loss', 'content': 0.0933912992477417, 'timestamp': '2025-09-30 23:02:50.277898', 'step': 1409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:50.339851', 'step': 1409, 'epoch': 1} {'type': 'loss', 'content': 0.004238645080476999, 'timestamp': '2025-09-30 23:02:50.347997', 'step': 1410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:50.404704', 'step': 1410, 'epoch': 1} {'type': 'loss', 'content': 0.06156480684876442, 'timestamp': '2025-09-30 23:02:50.409472', 'step': 1411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:50.468624', 'step': 1411, 'epoch': 1} {'type': 'loss', 'content': 0.02082459069788456, 'timestamp': '2025-09-30 23:02:50.475289', 'step': 1412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:02:50.545324', 'step': 1412, 'epoch': 1} {'type': 'loss', 'content': 0.017574504017829895, 'timestamp': '2025-09-30 23:02:50.554713', 'step': 1413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:50.645446', 'step': 1413, 'epoch': 1} {'type': 'loss', 'content': 0.042238473892211914, 'timestamp': '2025-09-30 23:02:50.649841', 'step': 1414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:50.712054', 'step': 1414, 'epoch': 1} {'type': 'loss', 'content': 0.033252689987421036, 'timestamp': '2025-09-30 23:02:50.720121', 'step': 1415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:50.796025', 'step': 1415, 'epoch': 1} {'type': 'loss', 'content': 0.037780143320560455, 'timestamp': '2025-09-30 23:02:50.809648', 'step': 1416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:50.881977', 'step': 1416, 'epoch': 1} {'type': 'loss', 'content': 0.019785547628998756, 'timestamp': '2025-09-30 23:02:50.892994', 'step': 1417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:50.959658', 'step': 1417, 'epoch': 1} {'type': 'loss', 'content': 0.010988726280629635, 'timestamp': '2025-09-30 23:02:50.970736', 'step': 1418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:51.053606', 'step': 1418, 'epoch': 1} {'type': 'loss', 'content': 0.0237441249191761, 'timestamp': '2025-09-30 23:02:51.056903', 'step': 1419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:51.134590', 'step': 1419, 'epoch': 1} {'type': 'loss', 'content': 0.008621657267212868, 'timestamp': '2025-09-30 23:02:51.144386', 'step': 1420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:51.249500', 'step': 1420, 'epoch': 1} {'type': 'loss', 'content': 0.030309442430734634, 'timestamp': '2025-09-30 23:02:51.252702', 'step': 1421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:51.317945', 'step': 1421, 'epoch': 1} {'type': 'loss', 'content': 0.025275666266679764, 'timestamp': '2025-09-30 23:02:51.323393', 'step': 1422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:02:51.389270', 'step': 1422, 'epoch': 1} {'type': 'loss', 'content': 0.05504307150840759, 'timestamp': '2025-09-30 23:02:51.398871', 'step': 1423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:51.467898', 'step': 1423, 'epoch': 1} {'type': 'loss', 'content': 0.03992205858230591, 'timestamp': '2025-09-30 23:02:51.481024', 'step': 1424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:51.562089', 'step': 1424, 'epoch': 1} {'type': 'loss', 'content': 0.005670011043548584, 'timestamp': '2025-09-30 23:02:51.575403', 'step': 1425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:51.645014', 'step': 1425, 'epoch': 1} {'type': 'loss', 'content': 0.07730435580015182, 'timestamp': '2025-09-30 23:02:51.659243', 'step': 1426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:51.742106', 'step': 1426, 'epoch': 1} {'type': 'loss', 'content': 0.06256470829248428, 'timestamp': '2025-09-30 23:02:51.747659', 'step': 1427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:51.814840', 'step': 1427, 'epoch': 1} {'type': 'loss', 'content': 0.02508869394659996, 'timestamp': '2025-09-30 23:02:51.823366', 'step': 1428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:51.885361', 'step': 1428, 'epoch': 1} {'type': 'loss', 'content': 0.04527953267097473, 'timestamp': '2025-09-30 23:02:51.901179', 'step': 1429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:02:51.987882', 'step': 1429, 'epoch': 1} {'type': 'loss', 'content': 0.05237665772438049, 'timestamp': '2025-09-30 23:02:52.003124', 'step': 1430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:52.093701', 'step': 1430, 'epoch': 1} {'type': 'loss', 'content': 0.0036436785012483597, 'timestamp': '2025-09-30 23:02:52.097477', 'step': 1431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:52.161512', 'step': 1431, 'epoch': 1} {'type': 'loss', 'content': 0.006111314985901117, 'timestamp': '2025-09-30 23:02:52.174035', 'step': 1432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:52.252669', 'step': 1432, 'epoch': 1} {'type': 'loss', 'content': 0.06441114097833633, 'timestamp': '2025-09-30 23:02:52.265403', 'step': 1433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:52.345887', 'step': 1433, 'epoch': 1} {'type': 'loss', 'content': 0.03531522676348686, 'timestamp': '2025-09-30 23:02:52.349090', 'step': 1434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:52.419284', 'step': 1434, 'epoch': 1} {'type': 'loss', 'content': 0.031131308525800705, 'timestamp': '2025-09-30 23:02:52.430932', 'step': 1435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:52.503507', 'step': 1435, 'epoch': 1} {'type': 'loss', 'content': 0.029308075085282326, 'timestamp': '2025-09-30 23:02:52.516592', 'step': 1436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:52.589692', 'step': 1436, 'epoch': 1} {'type': 'loss', 'content': 0.016309870406985283, 'timestamp': '2025-09-30 23:02:52.594507', 'step': 1437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:52.659723', 'step': 1437, 'epoch': 1} {'type': 'loss', 'content': 0.0035005342215299606, 'timestamp': '2025-09-30 23:02:52.668876', 'step': 1438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:52.740834', 'step': 1438, 'epoch': 1} {'type': 'loss', 'content': 0.012909573502838612, 'timestamp': '2025-09-30 23:02:52.753376', 'step': 1439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:52.838609', 'step': 1439, 'epoch': 1} {'type': 'loss', 'content': 0.028114523738622665, 'timestamp': '2025-09-30 23:02:52.856382', 'step': 1440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:02:52.943896', 'step': 1440, 'epoch': 1} {'type': 'loss', 'content': 0.01388208381831646, 'timestamp': '2025-09-30 23:02:52.948203', 'step': 1441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:53.019545', 'step': 1441, 'epoch': 1} {'type': 'loss', 'content': 0.05473855510354042, 'timestamp': '2025-09-30 23:02:53.030150', 'step': 1442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:53.107015', 'step': 1442, 'epoch': 1} {'type': 'loss', 'content': 0.0516105480492115, 'timestamp': '2025-09-30 23:02:53.115876', 'step': 1443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:53.195449', 'step': 1443, 'epoch': 1} {'type': 'loss', 'content': 0.015500868670642376, 'timestamp': '2025-09-30 23:02:53.209246', 'step': 1444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:53.290102', 'step': 1444, 'epoch': 1} {'type': 'loss', 'content': 0.08433102071285248, 'timestamp': '2025-09-30 23:02:53.294984', 'step': 1445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:02:53.351596', 'step': 1445, 'epoch': 1} {'type': 'loss', 'content': 0.025134149938821793, 'timestamp': '2025-09-30 23:02:53.362350', 'step': 1446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:02:53.436229', 'step': 1446, 'epoch': 1} {'type': 'loss', 'content': 0.03218942880630493, 'timestamp': '2025-09-30 23:02:53.445723', 'step': 1447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:53.524826', 'step': 1447, 'epoch': 1} {'type': 'loss', 'content': 0.05246192216873169, 'timestamp': '2025-09-30 23:02:53.542296', 'step': 1448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:53.614736', 'step': 1448, 'epoch': 1} {'type': 'loss', 'content': 0.05491900071501732, 'timestamp': '2025-09-30 23:02:53.624055', 'step': 1449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:53.701769', 'step': 1449, 'epoch': 1} {'type': 'loss', 'content': 0.026489872485399246, 'timestamp': '2025-09-30 23:02:53.712956', 'step': 1450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:53.780057', 'step': 1450, 'epoch': 1} {'type': 'loss', 'content': 0.016705866903066635, 'timestamp': '2025-09-30 23:02:53.791207', 'step': 1451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:53.866840', 'step': 1451, 'epoch': 1} {'type': 'loss', 'content': 0.0048500606790184975, 'timestamp': '2025-09-30 23:02:53.880314', 'step': 1452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:53.942610', 'step': 1452, 'epoch': 1} {'type': 'loss', 'content': 0.0072847167029976845, 'timestamp': '2025-09-30 23:02:53.953653', 'step': 1453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:54.022810', 'step': 1453, 'epoch': 1} {'type': 'loss', 'content': 0.015470330603420734, 'timestamp': '2025-09-30 23:02:54.031450', 'step': 1454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:54.088144', 'step': 1454, 'epoch': 1} {'type': 'loss', 'content': 0.022549878805875778, 'timestamp': '2025-09-30 23:02:54.096383', 'step': 1455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:02:54.151541', 'step': 1455, 'epoch': 1} {'type': 'loss', 'content': 0.024478182196617126, 'timestamp': '2025-09-30 23:02:54.168655', 'step': 1456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:54.246518', 'step': 1456, 'epoch': 1} {'type': 'loss', 'content': 0.037737004458904266, 'timestamp': '2025-09-30 23:02:54.257602', 'step': 1457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:54.343114', 'step': 1457, 'epoch': 1} {'type': 'loss', 'content': 0.036099787801504135, 'timestamp': '2025-09-30 23:02:54.352222', 'step': 1458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:54.429231', 'step': 1458, 'epoch': 1} {'type': 'loss', 'content': 0.03574643284082413, 'timestamp': '2025-09-30 23:02:54.437417', 'step': 1459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:54.510272', 'step': 1459, 'epoch': 1} {'type': 'loss', 'content': 0.02653498761355877, 'timestamp': '2025-09-30 23:02:54.521736', 'step': 1460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:02:54.585060', 'step': 1460, 'epoch': 1} {'type': 'loss', 'content': 0.016182774677872658, 'timestamp': '2025-09-30 23:02:54.594641', 'step': 1461, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:54.651817', 'step': 1461, 'epoch': 1} {'type': 'loss', 'content': 0.057852912694215775, 'timestamp': '2025-09-30 23:02:54.655889', 'step': 1462, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:54.710601', 'step': 1462, 'epoch': 1} {'type': 'loss', 'content': 0.018269706517457962, 'timestamp': '2025-09-30 23:02:54.721841', 'step': 1463, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:54.784803', 'step': 1463, 'epoch': 1} {'type': 'loss', 'content': 0.01571071892976761, 'timestamp': '2025-09-30 23:02:54.792662', 'step': 1464, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:54.847640', 'step': 1464, 'epoch': 1} {'type': 'loss', 'content': 0.014124986715614796, 'timestamp': '2025-09-30 23:02:54.850777', 'step': 1465, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:02:54.908646', 'step': 1465, 'epoch': 1} {'type': 'loss', 'content': 0.08831469714641571, 'timestamp': '2025-09-30 23:02:54.911815', 'step': 1466, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:54.967935', 'step': 1466, 'epoch': 1} {'type': 'loss', 'content': 0.028587710112333298, 'timestamp': '2025-09-30 23:02:54.971780', 'step': 1467, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:55.026660', 'step': 1467, 'epoch': 1} {'type': 'loss', 'content': 0.010386691428720951, 'timestamp': '2025-09-30 23:02:55.040566', 'step': 1468, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:55.115348', 'step': 1468, 'epoch': 1} {'type': 'loss', 'content': 0.012291979975998402, 'timestamp': '2025-09-30 23:02:55.127137', 'step': 1469, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:55.198115', 'step': 1469, 'epoch': 1} {'type': 'loss', 'content': 0.020621582865715027, 'timestamp': '2025-09-30 23:02:55.201997', 'step': 1470, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:55.265722', 'step': 1470, 'epoch': 1} {'type': 'loss', 'content': 0.03788720816373825, 'timestamp': '2025-09-30 23:02:55.273498', 'step': 1471, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:02:55.340496', 'step': 1471, 'epoch': 1} {'type': 'loss', 'content': 0.06497042626142502, 'timestamp': '2025-09-30 23:02:55.350833', 'step': 1472, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:55.423045', 'step': 1472, 'epoch': 1} {'type': 'loss', 'content': 0.03621384873986244, 'timestamp': '2025-09-30 23:02:55.426878', 'step': 1473, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:55.482500', 'step': 1473, 'epoch': 1} {'type': 'loss', 'content': 0.02207300066947937, 'timestamp': '2025-09-30 23:02:55.485736', 'step': 1474, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:55.542646', 'step': 1474, 'epoch': 1} {'type': 'loss', 'content': 0.03936600312590599, 'timestamp': '2025-09-30 23:02:55.547238', 'step': 1475, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:02:55.604350', 'step': 1475, 'epoch': 1} {'type': 'loss', 'content': 0.014081784524023533, 'timestamp': '2025-09-30 23:02:55.611124', 'step': 1476, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:55.670934', 'step': 1476, 'epoch': 1} {'type': 'loss', 'content': 0.011886038817465305, 'timestamp': '2025-09-30 23:02:55.675013', 'step': 1477, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:02:55.737550', 'step': 1477, 'epoch': 1} {'type': 'loss', 'content': 0.03396204113960266, 'timestamp': '2025-09-30 23:02:55.744231', 'step': 1478, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:55.813238', 'step': 1478, 'epoch': 1} {'type': 'loss', 'content': 0.03306568041443825, 'timestamp': '2025-09-30 23:02:55.820233', 'step': 1479, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:55.882520', 'step': 1479, 'epoch': 1} {'type': 'loss', 'content': 0.038977619260549545, 'timestamp': '2025-09-30 23:02:55.896656', 'step': 1480, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:55.962321', 'step': 1480, 'epoch': 1} {'type': 'loss', 'content': 0.025107797235250473, 'timestamp': '2025-09-30 23:02:55.967021', 'step': 1481, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:56.037579', 'step': 1481, 'epoch': 1} {'type': 'loss', 'content': 0.02001824416220188, 'timestamp': '2025-09-30 23:02:56.040594', 'step': 1482, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:56.094848', 'step': 1482, 'epoch': 1} {'type': 'loss', 'content': 0.010101823136210442, 'timestamp': '2025-09-30 23:02:56.097707', 'step': 1483, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:02:56.151327', 'step': 1483, 'epoch': 1} {'type': 'loss', 'content': 0.02949589118361473, 'timestamp': '2025-09-30 23:02:56.157308', 'step': 1484, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:56.212154', 'step': 1484, 'epoch': 1} {'type': 'loss', 'content': 0.04242183640599251, 'timestamp': '2025-09-30 23:02:56.219677', 'step': 1485, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:56.279872', 'step': 1485, 'epoch': 1} {'type': 'loss', 'content': 0.027769023552536964, 'timestamp': '2025-09-30 23:02:56.282155', 'step': 1486, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:56.346059', 'step': 1486, 'epoch': 1} {'type': 'loss', 'content': 0.020919788628816605, 'timestamp': '2025-09-30 23:02:56.350048', 'step': 1487, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:02:56.404898', 'step': 1487, 'epoch': 1} {'type': 'loss', 'content': 0.016232851892709732, 'timestamp': '2025-09-30 23:02:56.410894', 'step': 1488, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:56.464928', 'step': 1488, 'epoch': 1} {'type': 'loss', 'content': 0.03370091691613197, 'timestamp': '2025-09-30 23:02:56.467177', 'step': 1489, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:56.523433', 'step': 1489, 'epoch': 1} {'type': 'loss', 'content': 0.04284954071044922, 'timestamp': '2025-09-30 23:02:56.530675', 'step': 1490, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:56.589433', 'step': 1490, 'epoch': 1} {'type': 'loss', 'content': 0.0748000368475914, 'timestamp': '2025-09-30 23:02:56.592824', 'step': 1491, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:02:56.648149', 'step': 1491, 'epoch': 1} {'type': 'loss', 'content': 0.03501405939459801, 'timestamp': '2025-09-30 23:02:56.654303', 'step': 1492, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:56.706632', 'step': 1492, 'epoch': 1} {'type': 'loss', 'content': 0.02654908038675785, 'timestamp': '2025-09-30 23:02:56.711257', 'step': 1493, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:56.765570', 'step': 1493, 'epoch': 1} {'type': 'loss', 'content': 0.026674656197428703, 'timestamp': '2025-09-30 23:02:56.767992', 'step': 1494, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:56.820000', 'step': 1494, 'epoch': 1} {'type': 'loss', 'content': 0.019559619948267937, 'timestamp': '2025-09-30 23:02:56.823032', 'step': 1495, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:56.877364', 'step': 1495, 'epoch': 1} {'type': 'loss', 'content': 0.02629818022251129, 'timestamp': '2025-09-30 23:02:56.885348', 'step': 1496, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:56.956072', 'step': 1496, 'epoch': 1} {'type': 'loss', 'content': 0.03586937114596367, 'timestamp': '2025-09-30 23:02:56.963187', 'step': 1497, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:02:57.035562', 'step': 1497, 'epoch': 1} {'type': 'loss', 'content': 0.02705039456486702, 'timestamp': '2025-09-30 23:02:57.042930', 'step': 1498, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:57.110546', 'step': 1498, 'epoch': 1} {'type': 'loss', 'content': 0.021071193739771843, 'timestamp': '2025-09-30 23:02:57.114623', 'step': 1499, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:57.174844', 'step': 1499, 'epoch': 1} {'type': 'loss', 'content': 0.019392939284443855, 'timestamp': '2025-09-30 23:02:57.182792', 'step': 1500, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 1500', 'timestamp': '2025-09-30 23:02:57.758136', 'step': 1500, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:57.816857', 'step': 1500, 'epoch': 1} {'type': 'loss', 'content': 0.01612907089293003, 'timestamp': '2025-09-30 23:02:57.821223', 'step': 1501, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:57.890790', 'step': 1501, 'epoch': 1} {'type': 'loss', 'content': 0.0278293639421463, 'timestamp': '2025-09-30 23:02:57.894794', 'step': 1502, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:57.953227', 'step': 1502, 'epoch': 1} {'type': 'loss', 'content': 0.011336848139762878, 'timestamp': '2025-09-30 23:02:57.957234', 'step': 1503, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:58.016564', 'step': 1503, 'epoch': 1} {'type': 'loss', 'content': 0.03859808295965195, 'timestamp': '2025-09-30 23:02:58.025004', 'step': 1504, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:58.085058', 'step': 1504, 'epoch': 1} {'type': 'loss', 'content': 0.031872037798166275, 'timestamp': '2025-09-30 23:02:58.092829', 'step': 1505, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:58.167030', 'step': 1505, 'epoch': 1} {'type': 'loss', 'content': 0.016442347317934036, 'timestamp': '2025-09-30 23:02:58.175052', 'step': 1506, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:58.245129', 'step': 1506, 'epoch': 1} {'type': 'loss', 'content': 0.04914351925253868, 'timestamp': '2025-09-30 23:02:58.251049', 'step': 1507, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:58.321614', 'step': 1507, 'epoch': 1} {'type': 'loss', 'content': 0.011031745001673698, 'timestamp': '2025-09-30 23:02:58.330230', 'step': 1508, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:58.389069', 'step': 1508, 'epoch': 1} {'type': 'loss', 'content': 0.07075849175453186, 'timestamp': '2025-09-30 23:02:58.398393', 'step': 1509, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:58.463727', 'step': 1509, 'epoch': 1} {'type': 'loss', 'content': 0.05661085247993469, 'timestamp': '2025-09-30 23:02:58.466490', 'step': 1510, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:02:58.521665', 'step': 1510, 'epoch': 1} {'type': 'loss', 'content': 0.053298771381378174, 'timestamp': '2025-09-30 23:02:58.524673', 'step': 1511, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:58.583697', 'step': 1511, 'epoch': 1} {'type': 'loss', 'content': 0.05211111903190613, 'timestamp': '2025-09-30 23:02:58.592747', 'step': 1512, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:58.654594', 'step': 1512, 'epoch': 1} {'type': 'loss', 'content': 0.0654623880982399, 'timestamp': '2025-09-30 23:02:58.658094', 'step': 1513, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:58.715259', 'step': 1513, 'epoch': 1} {'type': 'loss', 'content': 0.04350731894373894, 'timestamp': '2025-09-30 23:02:58.718536', 'step': 1514, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:58.783552', 'step': 1514, 'epoch': 1} {'type': 'loss', 'content': 0.02442876622080803, 'timestamp': '2025-09-30 23:02:58.788720', 'step': 1515, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:58.852707', 'step': 1515, 'epoch': 1} {'type': 'loss', 'content': 0.007539746817201376, 'timestamp': '2025-09-30 23:02:58.862437', 'step': 1516, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:58.927242', 'step': 1516, 'epoch': 1} {'type': 'loss', 'content': 0.007932565174996853, 'timestamp': '2025-09-30 23:02:58.935550', 'step': 1517, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:58.996708', 'step': 1517, 'epoch': 1} {'type': 'loss', 'content': 0.019116252660751343, 'timestamp': '2025-09-30 23:02:59.005430', 'step': 1518, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:59.064375', 'step': 1518, 'epoch': 1} {'type': 'loss', 'content': 0.020596830174326897, 'timestamp': '2025-09-30 23:02:59.067282', 'step': 1519, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:02:59.127759', 'step': 1519, 'epoch': 1} {'type': 'loss', 'content': 0.0324707105755806, 'timestamp': '2025-09-30 23:02:59.135026', 'step': 1520, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1596914505344}], 'timestamp': '2025-09-30 23:03:03.664766', 'step': 1520, 'epoch': 1} {'type': 'pplx', 'content': 6459038.506849336, 'timestamp': '2025-09-30 23:03:03.675233', 'step': 1520, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:03.735742', 'step': 1520, 'epoch': 1} {'type': 'loss', 'content': 0.030528707429766655, 'timestamp': '2025-09-30 23:03:03.744400', 'step': 1521, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:03.813541', 'step': 1521, 'epoch': 1} {'type': 'loss', 'content': 0.018094196915626526, 'timestamp': '2025-09-30 23:03:03.820004', 'step': 1522, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:03.886252', 'step': 1522, 'epoch': 1} {'type': 'loss', 'content': 0.02636393904685974, 'timestamp': '2025-09-30 23:03:03.890440', 'step': 1523, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:03.961152', 'step': 1523, 'epoch': 1} {'type': 'loss', 'content': 0.046948887407779694, 'timestamp': '2025-09-30 23:03:03.969327', 'step': 1524, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:04.036552', 'step': 1524, 'epoch': 1} {'type': 'loss', 'content': 0.01916453428566456, 'timestamp': '2025-09-30 23:03:04.046624', 'step': 1525, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:04.115893', 'step': 1525, 'epoch': 1} {'type': 'loss', 'content': 0.09148339182138443, 'timestamp': '2025-09-30 23:03:04.121348', 'step': 1526, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:04.185836', 'step': 1526, 'epoch': 1} {'type': 'loss', 'content': 0.035028934478759766, 'timestamp': '2025-09-30 23:03:04.194465', 'step': 1527, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:04.270400', 'step': 1527, 'epoch': 1} {'type': 'loss', 'content': 0.08074255287647247, 'timestamp': '2025-09-30 23:03:04.282171', 'step': 1528, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:04.355084', 'step': 1528, 'epoch': 1} {'type': 'loss', 'content': 0.034997597336769104, 'timestamp': '2025-09-30 23:03:04.363152', 'step': 1529, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:04.429532', 'step': 1529, 'epoch': 1} {'type': 'loss', 'content': 0.014126596041023731, 'timestamp': '2025-09-30 23:03:04.435779', 'step': 1530, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:04.502616', 'step': 1530, 'epoch': 1} {'type': 'loss', 'content': 0.0425933375954628, 'timestamp': '2025-09-30 23:03:04.511607', 'step': 1531, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:04.581613', 'step': 1531, 'epoch': 1} {'type': 'loss', 'content': 0.04201146587729454, 'timestamp': '2025-09-30 23:03:04.591332', 'step': 1532, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:04.656785', 'step': 1532, 'epoch': 1} {'type': 'loss', 'content': 0.01827825792133808, 'timestamp': '2025-09-30 23:03:04.664314', 'step': 1533, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:04.734857', 'step': 1533, 'epoch': 1} {'type': 'loss', 'content': 0.02849983051419258, 'timestamp': '2025-09-30 23:03:04.741070', 'step': 1534, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:04.816549', 'step': 1534, 'epoch': 1} {'type': 'loss', 'content': 0.042749322950839996, 'timestamp': '2025-09-30 23:03:04.829837', 'step': 1535, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:04.911068', 'step': 1535, 'epoch': 1} {'type': 'loss', 'content': 0.029860462993383408, 'timestamp': '2025-09-30 23:03:04.923249', 'step': 1536, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:04.997838', 'step': 1536, 'epoch': 1} {'type': 'loss', 'content': 0.028510957956314087, 'timestamp': '2025-09-30 23:03:05.009626', 'step': 1537, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:05.067657', 'step': 1537, 'epoch': 1} {'type': 'loss', 'content': 0.014252357184886932, 'timestamp': '2025-09-30 23:03:05.075926', 'step': 1538, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:05.145011', 'step': 1538, 'epoch': 1} {'type': 'loss', 'content': 0.04738182574510574, 'timestamp': '2025-09-30 23:03:05.155003', 'step': 1539, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:03:05.218437', 'step': 1539, 'epoch': 1} {'type': 'loss', 'content': 0.02862784080207348, 'timestamp': '2025-09-30 23:03:05.229557', 'step': 1540, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:05.288931', 'step': 1540, 'epoch': 1} {'type': 'loss', 'content': 0.03745010122656822, 'timestamp': '2025-09-30 23:03:05.295044', 'step': 1541, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:05.361257', 'step': 1541, 'epoch': 1} {'type': 'loss', 'content': 0.015365165658295155, 'timestamp': '2025-09-30 23:03:05.370607', 'step': 1542, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:05.433788', 'step': 1542, 'epoch': 1} {'type': 'loss', 'content': 0.003740377491340041, 'timestamp': '2025-09-30 23:03:05.443399', 'step': 1543, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:05.514046', 'step': 1543, 'epoch': 1} {'type': 'loss', 'content': 0.03813201189041138, 'timestamp': '2025-09-30 23:03:05.525245', 'step': 1544, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:05.581149', 'step': 1544, 'epoch': 1} {'type': 'loss', 'content': 0.04496752470731735, 'timestamp': '2025-09-30 23:03:05.585599', 'step': 1545, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:05.641435', 'step': 1545, 'epoch': 1} {'type': 'loss', 'content': 0.025898387655615807, 'timestamp': '2025-09-30 23:03:05.644607', 'step': 1546, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:05.701203', 'step': 1546, 'epoch': 1} {'type': 'loss', 'content': 0.05580626055598259, 'timestamp': '2025-09-30 23:03:05.706202', 'step': 1547, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:05.771067', 'step': 1547, 'epoch': 1} {'type': 'loss', 'content': 0.01795743778347969, 'timestamp': '2025-09-30 23:03:05.778918', 'step': 1548, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:05.834182', 'step': 1548, 'epoch': 1} {'type': 'loss', 'content': 0.025162050500512123, 'timestamp': '2025-09-30 23:03:05.839422', 'step': 1549, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:03:05.908965', 'step': 1549, 'epoch': 1} {'type': 'loss', 'content': 0.04131416603922844, 'timestamp': '2025-09-30 23:03:05.916711', 'step': 1550, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:05.981010', 'step': 1550, 'epoch': 1} {'type': 'loss', 'content': 0.014204987324774265, 'timestamp': '2025-09-30 23:03:05.987934', 'step': 1551, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:06.046357', 'step': 1551, 'epoch': 1} {'type': 'loss', 'content': 0.03304540738463402, 'timestamp': '2025-09-30 23:03:06.055435', 'step': 1552, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:06.117529', 'step': 1552, 'epoch': 1} {'type': 'loss', 'content': 0.03872261568903923, 'timestamp': '2025-09-30 23:03:06.127237', 'step': 1553, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:06.201183', 'step': 1553, 'epoch': 1} {'type': 'loss', 'content': 0.024706631898880005, 'timestamp': '2025-09-30 23:03:06.208866', 'step': 1554, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:06.273711', 'step': 1554, 'epoch': 1} {'type': 'loss', 'content': 0.04876335710287094, 'timestamp': '2025-09-30 23:03:06.280574', 'step': 1555, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:06.353067', 'step': 1555, 'epoch': 1} {'type': 'loss', 'content': 0.04908549785614014, 'timestamp': '2025-09-30 23:03:06.368427', 'step': 1556, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:06.428473', 'step': 1556, 'epoch': 1} {'type': 'loss', 'content': 0.043131351470947266, 'timestamp': '2025-09-30 23:03:06.434138', 'step': 1557, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:06.497115', 'step': 1557, 'epoch': 1} {'type': 'loss', 'content': 0.031885478645563126, 'timestamp': '2025-09-30 23:03:06.505604', 'step': 1558, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:06.565925', 'step': 1558, 'epoch': 1} {'type': 'loss', 'content': 0.02842784859240055, 'timestamp': '2025-09-30 23:03:06.571728', 'step': 1559, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:06.639581', 'step': 1559, 'epoch': 1} {'type': 'loss', 'content': 0.029432913288474083, 'timestamp': '2025-09-30 23:03:06.647320', 'step': 1560, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:06.705357', 'step': 1560, 'epoch': 1} {'type': 'loss', 'content': 0.015076769515872002, 'timestamp': '2025-09-30 23:03:06.709492', 'step': 1561, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:06.776707', 'step': 1561, 'epoch': 1} {'type': 'loss', 'content': 0.019198885187506676, 'timestamp': '2025-09-30 23:03:06.782212', 'step': 1562, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:06.847687', 'step': 1562, 'epoch': 1} {'type': 'loss', 'content': 0.04002996161580086, 'timestamp': '2025-09-30 23:03:06.852442', 'step': 1563, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 23:03:06.926355', 'step': 1563, 'epoch': 1} {'type': 'loss', 'content': 0.037930551916360855, 'timestamp': '2025-09-30 23:03:06.937609', 'step': 1564, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:06.999533', 'step': 1564, 'epoch': 1} {'type': 'loss', 'content': 0.05028128623962402, 'timestamp': '2025-09-30 23:03:07.011720', 'step': 1565, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:07.077016', 'step': 1565, 'epoch': 1} {'type': 'loss', 'content': 0.026081500574946404, 'timestamp': '2025-09-30 23:03:07.081997', 'step': 1566, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:07.142412', 'step': 1566, 'epoch': 1} {'type': 'loss', 'content': 0.01670665852725506, 'timestamp': '2025-09-30 23:03:07.148682', 'step': 1567, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:07.207928', 'step': 1567, 'epoch': 1} {'type': 'loss', 'content': 0.035604458302259445, 'timestamp': '2025-09-30 23:03:07.218729', 'step': 1568, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:07.289843', 'step': 1568, 'epoch': 1} {'type': 'loss', 'content': 0.05987481027841568, 'timestamp': '2025-09-30 23:03:07.293467', 'step': 1569, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:07.363422', 'step': 1569, 'epoch': 1} {'type': 'loss', 'content': 0.01926838792860508, 'timestamp': '2025-09-30 23:03:07.376837', 'step': 1570, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:07.460569', 'step': 1570, 'epoch': 1} {'type': 'loss', 'content': 0.03912753984332085, 'timestamp': '2025-09-30 23:03:07.473068', 'step': 1571, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:07.545358', 'step': 1571, 'epoch': 1} {'type': 'loss', 'content': 0.015300449915230274, 'timestamp': '2025-09-30 23:03:07.553130', 'step': 1572, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:07.627154', 'step': 1572, 'epoch': 1} {'type': 'loss', 'content': 0.06443975120782852, 'timestamp': '2025-09-30 23:03:07.631738', 'step': 1573, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:07.690025', 'step': 1573, 'epoch': 1} {'type': 'loss', 'content': 0.03114987537264824, 'timestamp': '2025-09-30 23:03:07.705160', 'step': 1574, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:07.777323', 'step': 1574, 'epoch': 1} {'type': 'loss', 'content': 0.05294283479452133, 'timestamp': '2025-09-30 23:03:07.781854', 'step': 1575, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:07.841996', 'step': 1575, 'epoch': 1} {'type': 'loss', 'content': 0.017614956945180893, 'timestamp': '2025-09-30 23:03:07.862846', 'step': 1576, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:07.967024', 'step': 1576, 'epoch': 1} {'type': 'loss', 'content': 0.04652811214327812, 'timestamp': '2025-09-30 23:03:07.984554', 'step': 1577, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:08.073058', 'step': 1577, 'epoch': 1} {'type': 'loss', 'content': 0.02910836972296238, 'timestamp': '2025-09-30 23:03:08.088229', 'step': 1578, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:08.174386', 'step': 1578, 'epoch': 1} {'type': 'loss', 'content': 0.0316791757941246, 'timestamp': '2025-09-30 23:03:08.186869', 'step': 1579, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:08.262030', 'step': 1579, 'epoch': 1} {'type': 'loss', 'content': 0.030888568609952927, 'timestamp': '2025-09-30 23:03:08.270720', 'step': 1580, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:08.335959', 'step': 1580, 'epoch': 1} {'type': 'loss', 'content': 0.034852366894483566, 'timestamp': '2025-09-30 23:03:08.340040', 'step': 1581, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:08.396597', 'step': 1581, 'epoch': 1} {'type': 'loss', 'content': 0.06324341148138046, 'timestamp': '2025-09-30 23:03:08.400314', 'step': 1582, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:08.465264', 'step': 1582, 'epoch': 1} {'type': 'loss', 'content': 0.012672132812440395, 'timestamp': '2025-09-30 23:03:08.469648', 'step': 1583, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:08.524914', 'step': 1583, 'epoch': 1} {'type': 'loss', 'content': 0.048694539815187454, 'timestamp': '2025-09-30 23:03:08.533458', 'step': 1584, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:08.587876', 'step': 1584, 'epoch': 1} {'type': 'loss', 'content': 0.0379454605281353, 'timestamp': '2025-09-30 23:03:08.592401', 'step': 1585, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:08.650227', 'step': 1585, 'epoch': 1} {'type': 'loss', 'content': 0.043192096054553986, 'timestamp': '2025-09-30 23:03:08.653514', 'step': 1586, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:08.709259', 'step': 1586, 'epoch': 1} {'type': 'loss', 'content': 0.04910615459084511, 'timestamp': '2025-09-30 23:03:08.713478', 'step': 1587, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:08.770254', 'step': 1587, 'epoch': 1} {'type': 'loss', 'content': 0.03337354585528374, 'timestamp': '2025-09-30 23:03:08.782740', 'step': 1588, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:08.855571', 'step': 1588, 'epoch': 1} {'type': 'loss', 'content': 0.03470296412706375, 'timestamp': '2025-09-30 23:03:08.861018', 'step': 1589, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:08.922911', 'step': 1589, 'epoch': 1} {'type': 'loss', 'content': 0.031497370451688766, 'timestamp': '2025-09-30 23:03:08.925604', 'step': 1590, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:08.980767', 'step': 1590, 'epoch': 1} {'type': 'loss', 'content': 0.049098189920186996, 'timestamp': '2025-09-30 23:03:08.988139', 'step': 1591, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:09.059086', 'step': 1591, 'epoch': 1} {'type': 'loss', 'content': 0.04068639874458313, 'timestamp': '2025-09-30 23:03:09.066217', 'step': 1592, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:09.119381', 'step': 1592, 'epoch': 1} {'type': 'loss', 'content': 0.034401845186948776, 'timestamp': '2025-09-30 23:03:09.123087', 'step': 1593, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:09.191091', 'step': 1593, 'epoch': 1} {'type': 'loss', 'content': 0.030767086893320084, 'timestamp': '2025-09-30 23:03:09.203913', 'step': 1594, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:09.273928', 'step': 1594, 'epoch': 1} {'type': 'loss', 'content': 0.047466058284044266, 'timestamp': '2025-09-30 23:03:09.276601', 'step': 1595, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:09.331932', 'step': 1595, 'epoch': 1} {'type': 'loss', 'content': 0.03512118384242058, 'timestamp': '2025-09-30 23:03:09.340236', 'step': 1596, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:09.400687', 'step': 1596, 'epoch': 1} {'type': 'loss', 'content': 0.025132041424512863, 'timestamp': '2025-09-30 23:03:09.403811', 'step': 1597, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:09.462419', 'step': 1597, 'epoch': 1} {'type': 'loss', 'content': 0.019533613696694374, 'timestamp': '2025-09-30 23:03:09.469074', 'step': 1598, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:09.525336', 'step': 1598, 'epoch': 1} {'type': 'loss', 'content': 0.03494391590356827, 'timestamp': '2025-09-30 23:03:09.527916', 'step': 1599, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:09.589676', 'step': 1599, 'epoch': 1} {'type': 'loss', 'content': 0.011944936588406563, 'timestamp': '2025-09-30 23:03:09.602016', 'step': 1600, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:09.678013', 'step': 1600, 'epoch': 1} {'type': 'loss', 'content': 0.03347208350896835, 'timestamp': '2025-09-30 23:03:09.687335', 'step': 1601, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:09.759018', 'step': 1601, 'epoch': 1} {'type': 'loss', 'content': 0.03759691119194031, 'timestamp': '2025-09-30 23:03:09.766891', 'step': 1602, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:09.831141', 'step': 1602, 'epoch': 1} {'type': 'loss', 'content': 0.023153407499194145, 'timestamp': '2025-09-30 23:03:09.837393', 'step': 1603, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:09.906302', 'step': 1603, 'epoch': 1} {'type': 'loss', 'content': 0.04647021368145943, 'timestamp': '2025-09-30 23:03:09.913826', 'step': 1604, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:09.977345', 'step': 1604, 'epoch': 1} {'type': 'loss', 'content': 0.04052305221557617, 'timestamp': '2025-09-30 23:03:09.981664', 'step': 1605, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:10.036750', 'step': 1605, 'epoch': 1} {'type': 'loss', 'content': 0.012628245167434216, 'timestamp': '2025-09-30 23:03:10.040887', 'step': 1606, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:10.097782', 'step': 1606, 'epoch': 1} {'type': 'loss', 'content': 0.04314271733164787, 'timestamp': '2025-09-30 23:03:10.102883', 'step': 1607, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:10.169395', 'step': 1607, 'epoch': 1} {'type': 'loss', 'content': 0.008048886433243752, 'timestamp': '2025-09-30 23:03:10.175948', 'step': 1608, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:10.231349', 'step': 1608, 'epoch': 1} {'type': 'loss', 'content': 0.023121235892176628, 'timestamp': '2025-09-30 23:03:10.236352', 'step': 1609, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:10.300363', 'step': 1609, 'epoch': 1} {'type': 'loss', 'content': 0.023563113063573837, 'timestamp': '2025-09-30 23:03:10.302955', 'step': 1610, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:10.357268', 'step': 1610, 'epoch': 1} {'type': 'loss', 'content': 0.03834856301546097, 'timestamp': '2025-09-30 23:03:10.363543', 'step': 1611, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:10.422444', 'step': 1611, 'epoch': 1} {'type': 'loss', 'content': 0.04680116847157478, 'timestamp': '2025-09-30 23:03:10.431643', 'step': 1612, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:10.497074', 'step': 1612, 'epoch': 1} {'type': 'loss', 'content': 0.014328931458294392, 'timestamp': '2025-09-30 23:03:10.499724', 'step': 1613, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:10.557402', 'step': 1613, 'epoch': 1} {'type': 'loss', 'content': 0.03230210021138191, 'timestamp': '2025-09-30 23:03:10.560017', 'step': 1614, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:10.631954', 'step': 1614, 'epoch': 1} {'type': 'loss', 'content': 0.04218883439898491, 'timestamp': '2025-09-30 23:03:10.636599', 'step': 1615, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:10.703005', 'step': 1615, 'epoch': 1} {'type': 'loss', 'content': 0.035938866436481476, 'timestamp': '2025-09-30 23:03:10.710507', 'step': 1616, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:10.772048', 'step': 1616, 'epoch': 1} {'type': 'loss', 'content': 0.051642995327711105, 'timestamp': '2025-09-30 23:03:10.780731', 'step': 1617, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:10.845641', 'step': 1617, 'epoch': 1} {'type': 'loss', 'content': 0.06518284976482391, 'timestamp': '2025-09-30 23:03:10.847857', 'step': 1618, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:10.909688', 'step': 1618, 'epoch': 1} {'type': 'loss', 'content': 0.04437375068664551, 'timestamp': '2025-09-30 23:03:10.922420', 'step': 1619, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:10.978524', 'step': 1619, 'epoch': 1} {'type': 'loss', 'content': 0.021429304033517838, 'timestamp': '2025-09-30 23:03:10.984608', 'step': 1620, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:11.047822', 'step': 1620, 'epoch': 1} {'type': 'loss', 'content': 0.017382029443979263, 'timestamp': '2025-09-30 23:03:11.057733', 'step': 1621, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:11.132157', 'step': 1621, 'epoch': 1} {'type': 'loss', 'content': 0.017097970470786095, 'timestamp': '2025-09-30 23:03:11.138021', 'step': 1622, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:11.195989', 'step': 1622, 'epoch': 1} {'type': 'loss', 'content': 0.04476139694452286, 'timestamp': '2025-09-30 23:03:11.208142', 'step': 1623, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:11.278007', 'step': 1623, 'epoch': 1} {'type': 'loss', 'content': 0.0689309686422348, 'timestamp': '2025-09-30 23:03:11.285393', 'step': 1624, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:11.342007', 'step': 1624, 'epoch': 1} {'type': 'loss', 'content': 0.01900097168982029, 'timestamp': '2025-09-30 23:03:11.345167', 'step': 1625, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:11.404259', 'step': 1625, 'epoch': 1} {'type': 'loss', 'content': 0.0377378948032856, 'timestamp': '2025-09-30 23:03:11.408390', 'step': 1626, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:11.467577', 'step': 1626, 'epoch': 1} {'type': 'loss', 'content': 0.0333823524415493, 'timestamp': '2025-09-30 23:03:11.473721', 'step': 1627, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:11.539359', 'step': 1627, 'epoch': 1} {'type': 'loss', 'content': 0.0567627027630806, 'timestamp': '2025-09-30 23:03:11.554256', 'step': 1628, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:11.618241', 'step': 1628, 'epoch': 1} {'type': 'loss', 'content': 0.04223215952515602, 'timestamp': '2025-09-30 23:03:11.623673', 'step': 1629, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:11.702362', 'step': 1629, 'epoch': 1} {'type': 'loss', 'content': 0.007516287267208099, 'timestamp': '2025-09-30 23:03:11.714701', 'step': 1630, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:11.784477', 'step': 1630, 'epoch': 1} {'type': 'loss', 'content': 0.010016054846346378, 'timestamp': '2025-09-30 23:03:11.789901', 'step': 1631, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:11.868072', 'step': 1631, 'epoch': 1} {'type': 'loss', 'content': 0.03352132812142372, 'timestamp': '2025-09-30 23:03:11.876340', 'step': 1632, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:11.964357', 'step': 1632, 'epoch': 1} {'type': 'loss', 'content': 0.013317577540874481, 'timestamp': '2025-09-30 23:03:11.977335', 'step': 1633, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:12.049395', 'step': 1633, 'epoch': 1} {'type': 'loss', 'content': 0.0379173569381237, 'timestamp': '2025-09-30 23:03:12.059866', 'step': 1634, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:12.133391', 'step': 1634, 'epoch': 1} {'type': 'loss', 'content': 0.021042993292212486, 'timestamp': '2025-09-30 23:03:12.141389', 'step': 1635, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:12.201998', 'step': 1635, 'epoch': 1} {'type': 'loss', 'content': 0.026383819058537483, 'timestamp': '2025-09-30 23:03:12.214409', 'step': 1636, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:12.272209', 'step': 1636, 'epoch': 1} {'type': 'loss', 'content': 0.049220066517591476, 'timestamp': '2025-09-30 23:03:12.279932', 'step': 1637, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:12.341969', 'step': 1637, 'epoch': 1} {'type': 'loss', 'content': 0.029694635421037674, 'timestamp': '2025-09-30 23:03:12.349174', 'step': 1638, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:12.430882', 'step': 1638, 'epoch': 1} {'type': 'loss', 'content': 0.00874552596360445, 'timestamp': '2025-09-30 23:03:12.442792', 'step': 1639, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:12.511631', 'step': 1639, 'epoch': 1} {'type': 'loss', 'content': 0.02213328517973423, 'timestamp': '2025-09-30 23:03:12.520411', 'step': 1640, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:12.582848', 'step': 1640, 'epoch': 1} {'type': 'loss', 'content': 0.020082278177142143, 'timestamp': '2025-09-30 23:03:12.585191', 'step': 1641, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:12.657189', 'step': 1641, 'epoch': 1} {'type': 'loss', 'content': 0.02259869873523712, 'timestamp': '2025-09-30 23:03:12.668480', 'step': 1642, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:12.743888', 'step': 1642, 'epoch': 1} {'type': 'loss', 'content': 0.05221184343099594, 'timestamp': '2025-09-30 23:03:12.754382', 'step': 1643, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:12.832843', 'step': 1643, 'epoch': 1} {'type': 'loss', 'content': 0.03698321431875229, 'timestamp': '2025-09-30 23:03:12.841605', 'step': 1644, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:12.898407', 'step': 1644, 'epoch': 1} {'type': 'loss', 'content': 0.018465790897607803, 'timestamp': '2025-09-30 23:03:12.907951', 'step': 1645, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:12.977699', 'step': 1645, 'epoch': 1} {'type': 'loss', 'content': 0.01730368472635746, 'timestamp': '2025-09-30 23:03:12.981558', 'step': 1646, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:13.045681', 'step': 1646, 'epoch': 1} {'type': 'loss', 'content': 0.02592948079109192, 'timestamp': '2025-09-30 23:03:13.056683', 'step': 1647, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:13.131160', 'step': 1647, 'epoch': 1} {'type': 'loss', 'content': 0.05269220098853111, 'timestamp': '2025-09-30 23:03:13.142244', 'step': 1648, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:13.213993', 'step': 1648, 'epoch': 1} {'type': 'loss', 'content': 0.020737919956445694, 'timestamp': '2025-09-30 23:03:13.224206', 'step': 1649, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:13.297564', 'step': 1649, 'epoch': 1} {'type': 'loss', 'content': 0.02915198728442192, 'timestamp': '2025-09-30 23:03:13.314724', 'step': 1650, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:13.392172', 'step': 1650, 'epoch': 1} {'type': 'loss', 'content': 0.026531782001256943, 'timestamp': '2025-09-30 23:03:13.396929', 'step': 1651, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:13.466931', 'step': 1651, 'epoch': 1} {'type': 'loss', 'content': 0.012378302402794361, 'timestamp': '2025-09-30 23:03:13.476298', 'step': 1652, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:13.536824', 'step': 1652, 'epoch': 1} {'type': 'loss', 'content': 0.07074840366840363, 'timestamp': '2025-09-30 23:03:13.541648', 'step': 1653, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:13.607962', 'step': 1653, 'epoch': 1} {'type': 'loss', 'content': 0.015609714202582836, 'timestamp': '2025-09-30 23:03:13.612854', 'step': 1654, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:13.677465', 'step': 1654, 'epoch': 1} {'type': 'loss', 'content': 0.03183326870203018, 'timestamp': '2025-09-30 23:03:13.680558', 'step': 1655, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:13.742202', 'step': 1655, 'epoch': 1} {'type': 'loss', 'content': 0.04289180785417557, 'timestamp': '2025-09-30 23:03:13.755188', 'step': 1656, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:13.819736', 'step': 1656, 'epoch': 1} {'type': 'loss', 'content': 0.03881553187966347, 'timestamp': '2025-09-30 23:03:13.829543', 'step': 1657, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:13.887916', 'step': 1657, 'epoch': 1} {'type': 'loss', 'content': 0.009066164493560791, 'timestamp': '2025-09-30 23:03:13.901072', 'step': 1658, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:13.972894', 'step': 1658, 'epoch': 1} {'type': 'loss', 'content': 0.04151797294616699, 'timestamp': '2025-09-30 23:03:13.978505', 'step': 1659, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:14.036434', 'step': 1659, 'epoch': 1} {'type': 'loss', 'content': 0.04393450543284416, 'timestamp': '2025-09-30 23:03:14.051063', 'step': 1660, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:14.120423', 'step': 1660, 'epoch': 1} {'type': 'loss', 'content': 0.020039493218064308, 'timestamp': '2025-09-30 23:03:14.132486', 'step': 1661, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:14.204633', 'step': 1661, 'epoch': 1} {'type': 'loss', 'content': 0.04237166419625282, 'timestamp': '2025-09-30 23:03:14.213014', 'step': 1662, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:14.285556', 'step': 1662, 'epoch': 1} {'type': 'loss', 'content': 0.019750680774450302, 'timestamp': '2025-09-30 23:03:14.295177', 'step': 1663, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:14.373436', 'step': 1663, 'epoch': 1} {'type': 'loss', 'content': 0.028089897707104683, 'timestamp': '2025-09-30 23:03:14.381082', 'step': 1664, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:14.440608', 'step': 1664, 'epoch': 1} {'type': 'loss', 'content': 0.0023849087301641703, 'timestamp': '2025-09-30 23:03:14.445511', 'step': 1665, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:14.530486', 'step': 1665, 'epoch': 1} {'type': 'loss', 'content': 0.02282470464706421, 'timestamp': '2025-09-30 23:03:14.536775', 'step': 1666, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:14.603998', 'step': 1666, 'epoch': 1} {'type': 'loss', 'content': 0.03400218486785889, 'timestamp': '2025-09-30 23:03:14.608376', 'step': 1667, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:14.670762', 'step': 1667, 'epoch': 1} {'type': 'loss', 'content': 0.02335013821721077, 'timestamp': '2025-09-30 23:03:14.679280', 'step': 1668, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:14.741367', 'step': 1668, 'epoch': 1} {'type': 'loss', 'content': 0.006206917576491833, 'timestamp': '2025-09-30 23:03:14.756378', 'step': 1669, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:14.818763', 'step': 1669, 'epoch': 1} {'type': 'loss', 'content': 0.04239463061094284, 'timestamp': '2025-09-30 23:03:14.828123', 'step': 1670, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:14.893003', 'step': 1670, 'epoch': 1} {'type': 'loss', 'content': 0.006650371942669153, 'timestamp': '2025-09-30 23:03:14.898228', 'step': 1671, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:14.956853', 'step': 1671, 'epoch': 1} {'type': 'loss', 'content': 0.048059213906526566, 'timestamp': '2025-09-30 23:03:14.968069', 'step': 1672, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1596914505344}], 'timestamp': '2025-09-30 23:03:19.668842', 'step': 1672, 'epoch': 1} {'type': 'pplx', 'content': 6874312.808507587, 'timestamp': '2025-09-30 23:03:19.674550', 'step': 1672, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:19.735407', 'step': 1672, 'epoch': 1} {'type': 'loss', 'content': 0.04420149698853493, 'timestamp': '2025-09-30 23:03:19.739460', 'step': 1673, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:19.805675', 'step': 1673, 'epoch': 1} {'type': 'loss', 'content': 0.03221263736486435, 'timestamp': '2025-09-30 23:03:19.810565', 'step': 1674, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:19.873029', 'step': 1674, 'epoch': 1} {'type': 'loss', 'content': 0.008571487851440907, 'timestamp': '2025-09-30 23:03:19.877464', 'step': 1675, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:19.947249', 'step': 1675, 'epoch': 1} {'type': 'loss', 'content': 0.031165186315774918, 'timestamp': '2025-09-30 23:03:19.954139', 'step': 1676, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:20.019788', 'step': 1676, 'epoch': 1} {'type': 'loss', 'content': 0.02815958485007286, 'timestamp': '2025-09-30 23:03:20.034607', 'step': 1677, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:20.099018', 'step': 1677, 'epoch': 1} {'type': 'loss', 'content': 0.04379555955529213, 'timestamp': '2025-09-30 23:03:20.108913', 'step': 1678, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:20.173217', 'step': 1678, 'epoch': 1} {'type': 'loss', 'content': 0.025666074827313423, 'timestamp': '2025-09-30 23:03:20.176184', 'step': 1679, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:20.247108', 'step': 1679, 'epoch': 1} {'type': 'loss', 'content': 0.037563350051641464, 'timestamp': '2025-09-30 23:03:20.256823', 'step': 1680, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:20.315806', 'step': 1680, 'epoch': 1} {'type': 'loss', 'content': 0.02797895111143589, 'timestamp': '2025-09-30 23:03:20.320299', 'step': 1681, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:20.385513', 'step': 1681, 'epoch': 1} {'type': 'loss', 'content': 0.02842310257256031, 'timestamp': '2025-09-30 23:03:20.393255', 'step': 1682, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:20.462702', 'step': 1682, 'epoch': 1} {'type': 'loss', 'content': 0.01964518241584301, 'timestamp': '2025-09-30 23:03:20.466980', 'step': 1683, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:20.533129', 'step': 1683, 'epoch': 1} {'type': 'loss', 'content': 0.007362630218267441, 'timestamp': '2025-09-30 23:03:20.540518', 'step': 1684, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:20.610694', 'step': 1684, 'epoch': 1} {'type': 'loss', 'content': 0.03601453825831413, 'timestamp': '2025-09-30 23:03:20.613891', 'step': 1685, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:20.680771', 'step': 1685, 'epoch': 1} {'type': 'loss', 'content': 0.0023656568955630064, 'timestamp': '2025-09-30 23:03:20.684831', 'step': 1686, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:20.758019', 'step': 1686, 'epoch': 1} {'type': 'loss', 'content': 0.022723432630300522, 'timestamp': '2025-09-30 23:03:20.761756', 'step': 1687, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:20.821227', 'step': 1687, 'epoch': 1} {'type': 'loss', 'content': 0.03244378790259361, 'timestamp': '2025-09-30 23:03:20.829128', 'step': 1688, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:20.899826', 'step': 1688, 'epoch': 1} {'type': 'loss', 'content': 0.017519595101475716, 'timestamp': '2025-09-30 23:03:20.906323', 'step': 1689, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:20.983468', 'step': 1689, 'epoch': 1} {'type': 'loss', 'content': 0.028876857832074165, 'timestamp': '2025-09-30 23:03:20.988305', 'step': 1690, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:21.072807', 'step': 1690, 'epoch': 1} {'type': 'loss', 'content': 0.02844829298555851, 'timestamp': '2025-09-30 23:03:21.076476', 'step': 1691, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:21.137473', 'step': 1691, 'epoch': 1} {'type': 'loss', 'content': 0.05375096946954727, 'timestamp': '2025-09-30 23:03:21.144247', 'step': 1692, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:21.214969', 'step': 1692, 'epoch': 1} {'type': 'loss', 'content': 0.03547981381416321, 'timestamp': '2025-09-30 23:03:21.218008', 'step': 1693, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:21.280054', 'step': 1693, 'epoch': 1} {'type': 'loss', 'content': 0.008050830103456974, 'timestamp': '2025-09-30 23:03:21.286349', 'step': 1694, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:21.344023', 'step': 1694, 'epoch': 1} {'type': 'loss', 'content': 0.054097890853881836, 'timestamp': '2025-09-30 23:03:21.348879', 'step': 1695, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:21.424023', 'step': 1695, 'epoch': 1} {'type': 'loss', 'content': 0.009306274354457855, 'timestamp': '2025-09-30 23:03:21.431673', 'step': 1696, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:21.516483', 'step': 1696, 'epoch': 1} {'type': 'loss', 'content': 0.03626415878534317, 'timestamp': '2025-09-30 23:03:21.520150', 'step': 1697, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:21.586961', 'step': 1697, 'epoch': 1} {'type': 'loss', 'content': 0.029139483347535133, 'timestamp': '2025-09-30 23:03:21.593555', 'step': 1698, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:21.662152', 'step': 1698, 'epoch': 1} {'type': 'loss', 'content': 0.03576407581567764, 'timestamp': '2025-09-30 23:03:21.671813', 'step': 1699, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:21.736875', 'step': 1699, 'epoch': 1} {'type': 'loss', 'content': 0.04700320214033127, 'timestamp': '2025-09-30 23:03:21.749192', 'step': 1700, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:21.826499', 'step': 1700, 'epoch': 1} {'type': 'loss', 'content': 0.041361626237630844, 'timestamp': '2025-09-30 23:03:21.830966', 'step': 1701, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:21.888209', 'step': 1701, 'epoch': 1} {'type': 'loss', 'content': 0.03155558183789253, 'timestamp': '2025-09-30 23:03:21.897371', 'step': 1702, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:21.966071', 'step': 1702, 'epoch': 1} {'type': 'loss', 'content': 0.06961577385663986, 'timestamp': '2025-09-30 23:03:21.970066', 'step': 1703, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:22.025642', 'step': 1703, 'epoch': 1} {'type': 'loss', 'content': 0.011782236397266388, 'timestamp': '2025-09-30 23:03:22.038391', 'step': 1704, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:22.102587', 'step': 1704, 'epoch': 1} {'type': 'loss', 'content': 0.027972592040896416, 'timestamp': '2025-09-30 23:03:22.109655', 'step': 1705, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:22.180763', 'step': 1705, 'epoch': 1} {'type': 'loss', 'content': 0.02786806784570217, 'timestamp': '2025-09-30 23:03:22.185647', 'step': 1706, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:22.254622', 'step': 1706, 'epoch': 1} {'type': 'loss', 'content': 0.06349670141935349, 'timestamp': '2025-09-30 23:03:22.259792', 'step': 1707, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:22.332216', 'step': 1707, 'epoch': 1} {'type': 'loss', 'content': 0.016465744003653526, 'timestamp': '2025-09-30 23:03:22.340257', 'step': 1708, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:22.405687', 'step': 1708, 'epoch': 1} {'type': 'loss', 'content': 0.014937507919967175, 'timestamp': '2025-09-30 23:03:22.412871', 'step': 1709, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:03:22.481942', 'step': 1709, 'epoch': 1} {'type': 'loss', 'content': 0.012120380066335201, 'timestamp': '2025-09-30 23:03:22.485662', 'step': 1710, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:22.545192', 'step': 1710, 'epoch': 1} {'type': 'loss', 'content': 0.07412063330411911, 'timestamp': '2025-09-30 23:03:22.548941', 'step': 1711, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:22.607502', 'step': 1711, 'epoch': 1} {'type': 'loss', 'content': 0.0035197508987039328, 'timestamp': '2025-09-30 23:03:22.616945', 'step': 1712, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:22.676941', 'step': 1712, 'epoch': 1} {'type': 'loss', 'content': 0.014212304726243019, 'timestamp': '2025-09-30 23:03:22.679217', 'step': 1713, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:22.736830', 'step': 1713, 'epoch': 1} {'type': 'loss', 'content': 0.03978709504008293, 'timestamp': '2025-09-30 23:03:22.740331', 'step': 1714, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:22.798737', 'step': 1714, 'epoch': 1} {'type': 'loss', 'content': 0.04219522699713707, 'timestamp': '2025-09-30 23:03:22.803438', 'step': 1715, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:22.869221', 'step': 1715, 'epoch': 1} {'type': 'loss', 'content': 0.008176630362868309, 'timestamp': '2025-09-30 23:03:22.879177', 'step': 1716, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:22.951939', 'step': 1716, 'epoch': 1} {'type': 'loss', 'content': 0.032070983201265335, 'timestamp': '2025-09-30 23:03:22.962368', 'step': 1717, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:23.033302', 'step': 1717, 'epoch': 1} {'type': 'loss', 'content': 0.0282155629247427, 'timestamp': '2025-09-30 23:03:23.045166', 'step': 1718, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:23.114589', 'step': 1718, 'epoch': 1} {'type': 'loss', 'content': 0.05537924915552139, 'timestamp': '2025-09-30 23:03:23.122174', 'step': 1719, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:23.188119', 'step': 1719, 'epoch': 1} {'type': 'loss', 'content': 0.05578388646245003, 'timestamp': '2025-09-30 23:03:23.198924', 'step': 1720, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:23.260835', 'step': 1720, 'epoch': 1} {'type': 'loss', 'content': 0.029797006398439407, 'timestamp': '2025-09-30 23:03:23.265202', 'step': 1721, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:23.323311', 'step': 1721, 'epoch': 1} {'type': 'loss', 'content': 0.03057539090514183, 'timestamp': '2025-09-30 23:03:23.326340', 'step': 1722, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:23.393554', 'step': 1722, 'epoch': 1} {'type': 'loss', 'content': 0.04160817712545395, 'timestamp': '2025-09-30 23:03:23.408361', 'step': 1723, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:23.479368', 'step': 1723, 'epoch': 1} {'type': 'loss', 'content': 0.019370047375559807, 'timestamp': '2025-09-30 23:03:23.486340', 'step': 1724, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:23.562448', 'step': 1724, 'epoch': 1} {'type': 'loss', 'content': 0.015893610194325447, 'timestamp': '2025-09-30 23:03:23.572083', 'step': 1725, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:23.648949', 'step': 1725, 'epoch': 1} {'type': 'loss', 'content': 0.03906954452395439, 'timestamp': '2025-09-30 23:03:23.658406', 'step': 1726, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:23.731706', 'step': 1726, 'epoch': 1} {'type': 'loss', 'content': 0.00840983260422945, 'timestamp': '2025-09-30 23:03:23.737667', 'step': 1727, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:23.798843', 'step': 1727, 'epoch': 1} {'type': 'loss', 'content': 0.0575944222509861, 'timestamp': '2025-09-30 23:03:23.811500', 'step': 1728, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:23.883996', 'step': 1728, 'epoch': 1} {'type': 'loss', 'content': 0.02885087952017784, 'timestamp': '2025-09-30 23:03:23.888686', 'step': 1729, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:23.946420', 'step': 1729, 'epoch': 1} {'type': 'loss', 'content': 0.03267459198832512, 'timestamp': '2025-09-30 23:03:23.950169', 'step': 1730, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:03:24.015447', 'step': 1730, 'epoch': 1} {'type': 'loss', 'content': 0.02622651681303978, 'timestamp': '2025-09-30 23:03:24.023952', 'step': 1731, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:24.083689', 'step': 1731, 'epoch': 1} {'type': 'loss', 'content': 0.03276686742901802, 'timestamp': '2025-09-30 23:03:24.092219', 'step': 1732, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:03:24.154657', 'step': 1732, 'epoch': 1} {'type': 'loss', 'content': 0.014288174919784069, 'timestamp': '2025-09-30 23:03:24.159233', 'step': 1733, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:24.225057', 'step': 1733, 'epoch': 1} {'type': 'loss', 'content': 0.008940544910728931, 'timestamp': '2025-09-30 23:03:24.229777', 'step': 1734, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:24.289827', 'step': 1734, 'epoch': 1} {'type': 'loss', 'content': 0.02715335227549076, 'timestamp': '2025-09-30 23:03:24.296720', 'step': 1735, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:24.364929', 'step': 1735, 'epoch': 1} {'type': 'loss', 'content': 0.04862169176340103, 'timestamp': '2025-09-30 23:03:24.372369', 'step': 1736, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:24.443733', 'step': 1736, 'epoch': 1} {'type': 'loss', 'content': 0.0304782185703516, 'timestamp': '2025-09-30 23:03:24.448643', 'step': 1737, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:24.517298', 'step': 1737, 'epoch': 1} {'type': 'loss', 'content': 0.05467914417386055, 'timestamp': '2025-09-30 23:03:24.521878', 'step': 1738, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:24.579622', 'step': 1738, 'epoch': 1} {'type': 'loss', 'content': 0.03933584317564964, 'timestamp': '2025-09-30 23:03:24.583441', 'step': 1739, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:24.649325', 'step': 1739, 'epoch': 1} {'type': 'loss', 'content': 0.009368711151182652, 'timestamp': '2025-09-30 23:03:24.656781', 'step': 1740, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:24.738891', 'step': 1740, 'epoch': 1} {'type': 'loss', 'content': 0.04071250557899475, 'timestamp': '2025-09-30 23:03:24.749364', 'step': 1741, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:24.812382', 'step': 1741, 'epoch': 1} {'type': 'loss', 'content': 0.02354205958545208, 'timestamp': '2025-09-30 23:03:24.820609', 'step': 1742, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:24.884193', 'step': 1742, 'epoch': 1} {'type': 'loss', 'content': 0.018266336992383003, 'timestamp': '2025-09-30 23:03:24.889539', 'step': 1743, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:24.951487', 'step': 1743, 'epoch': 1} {'type': 'loss', 'content': 0.031470514833927155, 'timestamp': '2025-09-30 23:03:24.973220', 'step': 1744, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:25.037775', 'step': 1744, 'epoch': 1} {'type': 'loss', 'content': 0.004304593428969383, 'timestamp': '2025-09-30 23:03:25.041953', 'step': 1745, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:25.123621', 'step': 1745, 'epoch': 1} {'type': 'loss', 'content': 0.024358902126550674, 'timestamp': '2025-09-30 23:03:25.139531', 'step': 1746, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:25.251822', 'step': 1746, 'epoch': 1} {'type': 'loss', 'content': 0.053417522460222244, 'timestamp': '2025-09-30 23:03:25.255826', 'step': 1747, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:25.355469', 'step': 1747, 'epoch': 1} {'type': 'loss', 'content': 0.03503561019897461, 'timestamp': '2025-09-30 23:03:25.364448', 'step': 1748, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:25.450504', 'step': 1748, 'epoch': 1} {'type': 'loss', 'content': 0.010265844874083996, 'timestamp': '2025-09-30 23:03:25.455649', 'step': 1749, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:25.537144', 'step': 1749, 'epoch': 1} {'type': 'loss', 'content': 0.03291346877813339, 'timestamp': '2025-09-30 23:03:25.541065', 'step': 1750, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:25.617205', 'step': 1750, 'epoch': 1} {'type': 'loss', 'content': 0.014389291405677795, 'timestamp': '2025-09-30 23:03:25.624179', 'step': 1751, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:25.689356', 'step': 1751, 'epoch': 1} {'type': 'loss', 'content': 0.03136178478598595, 'timestamp': '2025-09-30 23:03:25.697599', 'step': 1752, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:25.757141', 'step': 1752, 'epoch': 1} {'type': 'loss', 'content': 0.022894904017448425, 'timestamp': '2025-09-30 23:03:25.761510', 'step': 1753, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:25.827017', 'step': 1753, 'epoch': 1} {'type': 'loss', 'content': 0.032587215304374695, 'timestamp': '2025-09-30 23:03:25.830986', 'step': 1754, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:25.889165', 'step': 1754, 'epoch': 1} {'type': 'loss', 'content': 0.03563065826892853, 'timestamp': '2025-09-30 23:03:25.892779', 'step': 1755, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:25.951330', 'step': 1755, 'epoch': 1} {'type': 'loss', 'content': 0.005269157700240612, 'timestamp': '2025-09-30 23:03:25.963916', 'step': 1756, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:26.017518', 'step': 1756, 'epoch': 1} {'type': 'loss', 'content': 0.043692853301763535, 'timestamp': '2025-09-30 23:03:26.021325', 'step': 1757, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:26.075753', 'step': 1757, 'epoch': 1} {'type': 'loss', 'content': 0.025392908602952957, 'timestamp': '2025-09-30 23:03:26.078103', 'step': 1758, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:26.137018', 'step': 1758, 'epoch': 1} {'type': 'loss', 'content': 0.046837229281663895, 'timestamp': '2025-09-30 23:03:26.140460', 'step': 1759, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:26.197658', 'step': 1759, 'epoch': 1} {'type': 'loss', 'content': 0.008817966096103191, 'timestamp': '2025-09-30 23:03:26.203790', 'step': 1760, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:26.258900', 'step': 1760, 'epoch': 1} {'type': 'loss', 'content': 0.022955697029829025, 'timestamp': '2025-09-30 23:03:26.263070', 'step': 1761, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:26.319321', 'step': 1761, 'epoch': 1} {'type': 'loss', 'content': 0.029590735211968422, 'timestamp': '2025-09-30 23:03:26.321805', 'step': 1762, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:26.381631', 'step': 1762, 'epoch': 1} {'type': 'loss', 'content': 0.017336755990982056, 'timestamp': '2025-09-30 23:03:26.385896', 'step': 1763, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:26.440447', 'step': 1763, 'epoch': 1} {'type': 'loss', 'content': 0.01700284518301487, 'timestamp': '2025-09-30 23:03:26.447421', 'step': 1764, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:26.509547', 'step': 1764, 'epoch': 1} {'type': 'loss', 'content': 0.02232586406171322, 'timestamp': '2025-09-30 23:03:26.512992', 'step': 1765, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:26.572115', 'step': 1765, 'epoch': 1} {'type': 'loss', 'content': 0.012428165413439274, 'timestamp': '2025-09-30 23:03:26.577499', 'step': 1766, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:26.655267', 'step': 1766, 'epoch': 1} {'type': 'loss', 'content': 0.020696517080068588, 'timestamp': '2025-09-30 23:03:26.667476', 'step': 1767, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:26.733019', 'step': 1767, 'epoch': 1} {'type': 'loss', 'content': 0.019136471673846245, 'timestamp': '2025-09-30 23:03:26.747935', 'step': 1768, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:26.822539', 'step': 1768, 'epoch': 1} {'type': 'loss', 'content': 0.018316080793738365, 'timestamp': '2025-09-30 23:03:26.833780', 'step': 1769, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:26.894585', 'step': 1769, 'epoch': 1} {'type': 'loss', 'content': 0.028435377404093742, 'timestamp': '2025-09-30 23:03:26.910300', 'step': 1770, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:26.979524', 'step': 1770, 'epoch': 1} {'type': 'loss', 'content': 0.042789239436388016, 'timestamp': '2025-09-30 23:03:26.994852', 'step': 1771, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:27.057318', 'step': 1771, 'epoch': 1} {'type': 'loss', 'content': 0.013749071396887302, 'timestamp': '2025-09-30 23:03:27.075344', 'step': 1772, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:27.135183', 'step': 1772, 'epoch': 1} {'type': 'loss', 'content': 0.03995902091264725, 'timestamp': '2025-09-30 23:03:27.142113', 'step': 1773, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:27.206028', 'step': 1773, 'epoch': 1} {'type': 'loss', 'content': 0.04246503859758377, 'timestamp': '2025-09-30 23:03:27.210273', 'step': 1774, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:27.270305', 'step': 1774, 'epoch': 1} {'type': 'loss', 'content': 0.02506271004676819, 'timestamp': '2025-09-30 23:03:27.279437', 'step': 1775, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:03:27.352994', 'step': 1775, 'epoch': 1} {'type': 'loss', 'content': 0.022575344890356064, 'timestamp': '2025-09-30 23:03:27.360127', 'step': 1776, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:27.422283', 'step': 1776, 'epoch': 1} {'type': 'loss', 'content': 0.009455171413719654, 'timestamp': '2025-09-30 23:03:27.427195', 'step': 1777, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:27.490409', 'step': 1777, 'epoch': 1} {'type': 'loss', 'content': 0.011384283192455769, 'timestamp': '2025-09-30 23:03:27.495012', 'step': 1778, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:27.571383', 'step': 1778, 'epoch': 1} {'type': 'loss', 'content': 0.03593213111162186, 'timestamp': '2025-09-30 23:03:27.575871', 'step': 1779, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:27.641541', 'step': 1779, 'epoch': 1} {'type': 'loss', 'content': 0.0553128719329834, 'timestamp': '2025-09-30 23:03:27.653921', 'step': 1780, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:27.719633', 'step': 1780, 'epoch': 1} {'type': 'loss', 'content': 0.007615168113261461, 'timestamp': '2025-09-30 23:03:27.727437', 'step': 1781, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:27.801492', 'step': 1781, 'epoch': 1} {'type': 'loss', 'content': 0.02984880842268467, 'timestamp': '2025-09-30 23:03:27.808694', 'step': 1782, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:27.876959', 'step': 1782, 'epoch': 1} {'type': 'loss', 'content': 0.034637488424777985, 'timestamp': '2025-09-30 23:03:27.883604', 'step': 1783, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:27.954878', 'step': 1783, 'epoch': 1} {'type': 'loss', 'content': 0.024759409949183464, 'timestamp': '2025-09-30 23:03:27.963688', 'step': 1784, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:28.027778', 'step': 1784, 'epoch': 1} {'type': 'loss', 'content': 0.005803500767797232, 'timestamp': '2025-09-30 23:03:28.033750', 'step': 1785, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:28.103157', 'step': 1785, 'epoch': 1} {'type': 'loss', 'content': 0.02652922458946705, 'timestamp': '2025-09-30 23:03:28.106895', 'step': 1786, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:28.177100', 'step': 1786, 'epoch': 1} {'type': 'loss', 'content': 0.0028733410872519016, 'timestamp': '2025-09-30 23:03:28.184224', 'step': 1787, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:28.253203', 'step': 1787, 'epoch': 1} {'type': 'loss', 'content': 0.0044654980301856995, 'timestamp': '2025-09-30 23:03:28.262384', 'step': 1788, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:28.327522', 'step': 1788, 'epoch': 1} {'type': 'loss', 'content': 0.0325225405395031, 'timestamp': '2025-09-30 23:03:28.335147', 'step': 1789, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:28.398600', 'step': 1789, 'epoch': 1} {'type': 'loss', 'content': 0.029208486899733543, 'timestamp': '2025-09-30 23:03:28.411899', 'step': 1790, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:28.480976', 'step': 1790, 'epoch': 1} {'type': 'loss', 'content': 0.01213490404188633, 'timestamp': '2025-09-30 23:03:28.488374', 'step': 1791, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:28.552098', 'step': 1791, 'epoch': 1} {'type': 'loss', 'content': 0.018032917752861977, 'timestamp': '2025-09-30 23:03:28.562445', 'step': 1792, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:28.628495', 'step': 1792, 'epoch': 1} {'type': 'loss', 'content': 0.02588854730129242, 'timestamp': '2025-09-30 23:03:28.637845', 'step': 1793, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:28.693395', 'step': 1793, 'epoch': 1} {'type': 'loss', 'content': 0.022657349705696106, 'timestamp': '2025-09-30 23:03:28.696198', 'step': 1794, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:28.748904', 'step': 1794, 'epoch': 1} {'type': 'loss', 'content': 0.008118325844407082, 'timestamp': '2025-09-30 23:03:28.751304', 'step': 1795, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:28.804174', 'step': 1795, 'epoch': 1} {'type': 'loss', 'content': 0.03985556587576866, 'timestamp': '2025-09-30 23:03:28.809783', 'step': 1796, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:28.863171', 'step': 1796, 'epoch': 1} {'type': 'loss', 'content': 0.04742357134819031, 'timestamp': '2025-09-30 23:03:28.865311', 'step': 1797, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:28.918655', 'step': 1797, 'epoch': 1} {'type': 'loss', 'content': 0.020307477563619614, 'timestamp': '2025-09-30 23:03:28.920940', 'step': 1798, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:28.975625', 'step': 1798, 'epoch': 1} {'type': 'loss', 'content': 0.0524475984275341, 'timestamp': '2025-09-30 23:03:28.978715', 'step': 1799, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:29.039594', 'step': 1799, 'epoch': 1} {'type': 'loss', 'content': 0.014409645460546017, 'timestamp': '2025-09-30 23:03:29.045702', 'step': 1800, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:29.100792', 'step': 1800, 'epoch': 1} {'type': 'loss', 'content': 0.03951900452375412, 'timestamp': '2025-09-30 23:03:29.103279', 'step': 1801, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:29.156632', 'step': 1801, 'epoch': 1} {'type': 'loss', 'content': 0.008828689344227314, 'timestamp': '2025-09-30 23:03:29.162732', 'step': 1802, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:29.222312', 'step': 1802, 'epoch': 1} {'type': 'loss', 'content': 0.024514975026249886, 'timestamp': '2025-09-30 23:03:29.229590', 'step': 1803, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:29.287172', 'step': 1803, 'epoch': 1} {'type': 'loss', 'content': 0.015096467919647694, 'timestamp': '2025-09-30 23:03:29.293917', 'step': 1804, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:29.352308', 'step': 1804, 'epoch': 1} {'type': 'loss', 'content': 0.01887637935578823, 'timestamp': '2025-09-30 23:03:29.357073', 'step': 1805, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:29.413606', 'step': 1805, 'epoch': 1} {'type': 'loss', 'content': 0.021046550944447517, 'timestamp': '2025-09-30 23:03:29.418015', 'step': 1806, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:29.475109', 'step': 1806, 'epoch': 1} {'type': 'loss', 'content': 0.011768442578613758, 'timestamp': '2025-09-30 23:03:29.478935', 'step': 1807, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:29.534005', 'step': 1807, 'epoch': 1} {'type': 'loss', 'content': 0.09662292897701263, 'timestamp': '2025-09-30 23:03:29.539938', 'step': 1808, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:29.592563', 'step': 1808, 'epoch': 1} {'type': 'loss', 'content': 0.0589679591357708, 'timestamp': '2025-09-30 23:03:29.595670', 'step': 1809, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:29.648946', 'step': 1809, 'epoch': 1} {'type': 'loss', 'content': 0.008137254044413567, 'timestamp': '2025-09-30 23:03:29.651427', 'step': 1810, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:29.704377', 'step': 1810, 'epoch': 1} {'type': 'loss', 'content': 0.05112248286604881, 'timestamp': '2025-09-30 23:03:29.706526', 'step': 1811, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:29.759878', 'step': 1811, 'epoch': 1} {'type': 'loss', 'content': 0.037033822387456894, 'timestamp': '2025-09-30 23:03:29.765792', 'step': 1812, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:29.819687', 'step': 1812, 'epoch': 1} {'type': 'loss', 'content': 0.025888176634907722, 'timestamp': '2025-09-30 23:03:29.822717', 'step': 1813, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:29.877467', 'step': 1813, 'epoch': 1} {'type': 'loss', 'content': 0.08764994144439697, 'timestamp': '2025-09-30 23:03:29.879773', 'step': 1814, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:29.933365', 'step': 1814, 'epoch': 1} {'type': 'loss', 'content': 0.1106322780251503, 'timestamp': '2025-09-30 23:03:29.936118', 'step': 1815, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:29.989887', 'step': 1815, 'epoch': 1} {'type': 'loss', 'content': 0.010078015737235546, 'timestamp': '2025-09-30 23:03:29.995828', 'step': 1816, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:30.049416', 'step': 1816, 'epoch': 1} {'type': 'loss', 'content': 0.021875860169529915, 'timestamp': '2025-09-30 23:03:30.052373', 'step': 1817, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:30.106590', 'step': 1817, 'epoch': 1} {'type': 'loss', 'content': 0.010391230694949627, 'timestamp': '2025-09-30 23:03:30.109175', 'step': 1818, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:30.163465', 'step': 1818, 'epoch': 1} {'type': 'loss', 'content': 0.013839521445333958, 'timestamp': '2025-09-30 23:03:30.166284', 'step': 1819, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:30.221851', 'step': 1819, 'epoch': 1} {'type': 'loss', 'content': 0.03663128614425659, 'timestamp': '2025-09-30 23:03:30.227752', 'step': 1820, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:30.280965', 'step': 1820, 'epoch': 1} {'type': 'loss', 'content': 0.060991909354925156, 'timestamp': '2025-09-30 23:03:30.283277', 'step': 1821, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:30.336937', 'step': 1821, 'epoch': 1} {'type': 'loss', 'content': 0.011012178845703602, 'timestamp': '2025-09-30 23:03:30.341692', 'step': 1822, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:30.400462', 'step': 1822, 'epoch': 1} {'type': 'loss', 'content': 0.008950588293373585, 'timestamp': '2025-09-30 23:03:30.409043', 'step': 1823, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:30.466814', 'step': 1823, 'epoch': 1} {'type': 'loss', 'content': 0.028605198487639427, 'timestamp': '2025-09-30 23:03:30.474210', 'step': 1824, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1596914505344}], 'timestamp': '2025-09-30 23:03:34.202886', 'step': 1824, 'epoch': 1} {'type': 'pplx', 'content': 6844545.916118688, 'timestamp': '2025-09-30 23:03:34.205276', 'step': 1824, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:34.257622', 'step': 1824, 'epoch': 1} {'type': 'loss', 'content': 0.029215382412075996, 'timestamp': '2025-09-30 23:03:34.260434', 'step': 1825, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:34.313600', 'step': 1825, 'epoch': 1} {'type': 'loss', 'content': 0.014803027734160423, 'timestamp': '2025-09-30 23:03:34.315970', 'step': 1826, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:34.370440', 'step': 1826, 'epoch': 1} {'type': 'loss', 'content': 0.01966497302055359, 'timestamp': '2025-09-30 23:03:34.372995', 'step': 1827, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:34.441835', 'step': 1827, 'epoch': 1} {'type': 'loss', 'content': 0.005669855047017336, 'timestamp': '2025-09-30 23:03:34.458367', 'step': 1828, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:34.530465', 'step': 1828, 'epoch': 1} {'type': 'loss', 'content': 0.07399825006723404, 'timestamp': '2025-09-30 23:03:34.534924', 'step': 1829, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:34.595990', 'step': 1829, 'epoch': 1} {'type': 'loss', 'content': 0.05087321251630783, 'timestamp': '2025-09-30 23:03:34.598723', 'step': 1830, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:34.654502', 'step': 1830, 'epoch': 1} {'type': 'loss', 'content': 0.023847045376896858, 'timestamp': '2025-09-30 23:03:34.657169', 'step': 1831, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:34.710240', 'step': 1831, 'epoch': 1} {'type': 'loss', 'content': 0.03420443832874298, 'timestamp': '2025-09-30 23:03:34.716776', 'step': 1832, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:34.771734', 'step': 1832, 'epoch': 1} {'type': 'loss', 'content': 0.02555200271308422, 'timestamp': '2025-09-30 23:03:34.777131', 'step': 1833, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:34.830361', 'step': 1833, 'epoch': 1} {'type': 'loss', 'content': 0.03044826351106167, 'timestamp': '2025-09-30 23:03:34.834524', 'step': 1834, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:34.894773', 'step': 1834, 'epoch': 1} {'type': 'loss', 'content': 0.011559704318642616, 'timestamp': '2025-09-30 23:03:34.902212', 'step': 1835, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:34.969641', 'step': 1835, 'epoch': 1} {'type': 'loss', 'content': 0.016390759497880936, 'timestamp': '2025-09-30 23:03:34.977491', 'step': 1836, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:35.039702', 'step': 1836, 'epoch': 1} {'type': 'loss', 'content': 0.022480975836515427, 'timestamp': '2025-09-30 23:03:35.041950', 'step': 1837, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:35.094790', 'step': 1837, 'epoch': 1} {'type': 'loss', 'content': 0.019724097102880478, 'timestamp': '2025-09-30 23:03:35.097192', 'step': 1838, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:35.151330', 'step': 1838, 'epoch': 1} {'type': 'loss', 'content': 0.058690499514341354, 'timestamp': '2025-09-30 23:03:35.153717', 'step': 1839, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:35.208235', 'step': 1839, 'epoch': 1} {'type': 'loss', 'content': 0.01862393319606781, 'timestamp': '2025-09-30 23:03:35.214105', 'step': 1840, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:35.270309', 'step': 1840, 'epoch': 1} {'type': 'loss', 'content': 0.02954324148595333, 'timestamp': '2025-09-30 23:03:35.273227', 'step': 1841, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:35.326945', 'step': 1841, 'epoch': 1} {'type': 'loss', 'content': 0.027276454493403435, 'timestamp': '2025-09-30 23:03:35.329126', 'step': 1842, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:35.382827', 'step': 1842, 'epoch': 1} {'type': 'loss', 'content': 0.007999272085726261, 'timestamp': '2025-09-30 23:03:35.385135', 'step': 1843, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:35.442184', 'step': 1843, 'epoch': 1} {'type': 'loss', 'content': 0.03706697002053261, 'timestamp': '2025-09-30 23:03:35.448576', 'step': 1844, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:35.502751', 'step': 1844, 'epoch': 1} {'type': 'loss', 'content': 0.043660759925842285, 'timestamp': '2025-09-30 23:03:35.506195', 'step': 1845, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:35.565734', 'step': 1845, 'epoch': 1} {'type': 'loss', 'content': 0.016251197084784508, 'timestamp': '2025-09-30 23:03:35.569894', 'step': 1846, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:35.627307', 'step': 1846, 'epoch': 1} {'type': 'loss', 'content': 0.04578496888279915, 'timestamp': '2025-09-30 23:03:35.630311', 'step': 1847, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:35.685265', 'step': 1847, 'epoch': 1} {'type': 'loss', 'content': 0.030103247612714767, 'timestamp': '2025-09-30 23:03:35.691868', 'step': 1848, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:35.745326', 'step': 1848, 'epoch': 1} {'type': 'loss', 'content': 0.03982831910252571, 'timestamp': '2025-09-30 23:03:35.748252', 'step': 1849, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:35.804307', 'step': 1849, 'epoch': 1} {'type': 'loss', 'content': 0.03667277470231056, 'timestamp': '2025-09-30 23:03:35.806589', 'step': 1850, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:35.861580', 'step': 1850, 'epoch': 1} {'type': 'loss', 'content': 0.03912704437971115, 'timestamp': '2025-09-30 23:03:35.864815', 'step': 1851, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:35.922465', 'step': 1851, 'epoch': 1} {'type': 'loss', 'content': 0.033648308366537094, 'timestamp': '2025-09-30 23:03:35.928998', 'step': 1852, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:35.987065', 'step': 1852, 'epoch': 1} {'type': 'loss', 'content': 0.015525797381997108, 'timestamp': '2025-09-30 23:03:35.993915', 'step': 1853, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:36.060346', 'step': 1853, 'epoch': 1} {'type': 'loss', 'content': 0.03959435969591141, 'timestamp': '2025-09-30 23:03:36.068091', 'step': 1854, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:36.135514', 'step': 1854, 'epoch': 1} {'type': 'loss', 'content': 0.02674132026731968, 'timestamp': '2025-09-30 23:03:36.139628', 'step': 1855, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:36.212396', 'step': 1855, 'epoch': 1} {'type': 'loss', 'content': 0.03977436199784279, 'timestamp': '2025-09-30 23:03:36.219298', 'step': 1856, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:36.279087', 'step': 1856, 'epoch': 1} {'type': 'loss', 'content': 0.03875306248664856, 'timestamp': '2025-09-30 23:03:36.282217', 'step': 1857, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:36.338624', 'step': 1857, 'epoch': 1} {'type': 'loss', 'content': 0.009456208907067776, 'timestamp': '2025-09-30 23:03:36.343688', 'step': 1858, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:36.402142', 'step': 1858, 'epoch': 1} {'type': 'loss', 'content': 0.03257815167307854, 'timestamp': '2025-09-30 23:03:36.404717', 'step': 1859, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:36.459713', 'step': 1859, 'epoch': 1} {'type': 'loss', 'content': 0.011994227766990662, 'timestamp': '2025-09-30 23:03:36.466711', 'step': 1860, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:36.521029', 'step': 1860, 'epoch': 1} {'type': 'loss', 'content': 0.05342849716544151, 'timestamp': '2025-09-30 23:03:36.526025', 'step': 1861, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:36.582445', 'step': 1861, 'epoch': 1} {'type': 'loss', 'content': 0.020704971626400948, 'timestamp': '2025-09-30 23:03:36.586242', 'step': 1862, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:36.643645', 'step': 1862, 'epoch': 1} {'type': 'loss', 'content': 0.060935281217098236, 'timestamp': '2025-09-30 23:03:36.647432', 'step': 1863, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:36.703439', 'step': 1863, 'epoch': 1} {'type': 'loss', 'content': 0.024150853976607323, 'timestamp': '2025-09-30 23:03:36.712990', 'step': 1864, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:36.768063', 'step': 1864, 'epoch': 1} {'type': 'loss', 'content': 0.05228426307439804, 'timestamp': '2025-09-30 23:03:36.772482', 'step': 1865, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:36.825739', 'step': 1865, 'epoch': 1} {'type': 'loss', 'content': 0.039144545793533325, 'timestamp': '2025-09-30 23:03:36.827857', 'step': 1866, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:36.882209', 'step': 1866, 'epoch': 1} {'type': 'loss', 'content': 0.011336366645991802, 'timestamp': '2025-09-30 23:03:36.884614', 'step': 1867, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:36.938236', 'step': 1867, 'epoch': 1} {'type': 'loss', 'content': 0.021397080272436142, 'timestamp': '2025-09-30 23:03:36.945196', 'step': 1868, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:36.999398', 'step': 1868, 'epoch': 1} {'type': 'loss', 'content': 0.03581756353378296, 'timestamp': '2025-09-30 23:03:37.001833', 'step': 1869, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:37.062804', 'step': 1869, 'epoch': 1} {'type': 'loss', 'content': 0.03817983716726303, 'timestamp': '2025-09-30 23:03:37.065771', 'step': 1870, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:37.124413', 'step': 1870, 'epoch': 1} {'type': 'loss', 'content': 0.018746482208371162, 'timestamp': '2025-09-30 23:03:37.132465', 'step': 1871, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:37.193205', 'step': 1871, 'epoch': 1} {'type': 'loss', 'content': 0.03618135303258896, 'timestamp': '2025-09-30 23:03:37.201528', 'step': 1872, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:37.265867', 'step': 1872, 'epoch': 1} {'type': 'loss', 'content': 0.05515732988715172, 'timestamp': '2025-09-30 23:03:37.268608', 'step': 1873, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:37.324651', 'step': 1873, 'epoch': 1} {'type': 'loss', 'content': 0.021562844514846802, 'timestamp': '2025-09-30 23:03:37.335918', 'step': 1874, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:37.392031', 'step': 1874, 'epoch': 1} {'type': 'loss', 'content': 0.02444400079548359, 'timestamp': '2025-09-30 23:03:37.394388', 'step': 1875, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:37.447897', 'step': 1875, 'epoch': 1} {'type': 'loss', 'content': 0.0204839576035738, 'timestamp': '2025-09-30 23:03:37.455353', 'step': 1876, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:37.512697', 'step': 1876, 'epoch': 1} {'type': 'loss', 'content': 0.01437454391270876, 'timestamp': '2025-09-30 23:03:37.514989', 'step': 1877, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:37.573268', 'step': 1877, 'epoch': 1} {'type': 'loss', 'content': 0.06275821477174759, 'timestamp': '2025-09-30 23:03:37.576007', 'step': 1878, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:37.633101', 'step': 1878, 'epoch': 1} {'type': 'loss', 'content': 0.014627352356910706, 'timestamp': '2025-09-30 23:03:37.637857', 'step': 1879, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:37.693249', 'step': 1879, 'epoch': 1} {'type': 'loss', 'content': 0.01776624657213688, 'timestamp': '2025-09-30 23:03:37.704600', 'step': 1880, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:37.756823', 'step': 1880, 'epoch': 1} {'type': 'loss', 'content': 0.019725052639842033, 'timestamp': '2025-09-30 23:03:37.759449', 'step': 1881, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:37.813147', 'step': 1881, 'epoch': 1} {'type': 'loss', 'content': 0.011432499624788761, 'timestamp': '2025-09-30 23:03:37.815144', 'step': 1882, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:37.868602', 'step': 1882, 'epoch': 1} {'type': 'loss', 'content': 0.02104000747203827, 'timestamp': '2025-09-30 23:03:37.873300', 'step': 1883, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:37.931445', 'step': 1883, 'epoch': 1} {'type': 'loss', 'content': 0.016873804852366447, 'timestamp': '2025-09-30 23:03:37.937715', 'step': 1884, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:38.000898', 'step': 1884, 'epoch': 1} {'type': 'loss', 'content': 0.03955613449215889, 'timestamp': '2025-09-30 23:03:38.003609', 'step': 1885, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:38.059535', 'step': 1885, 'epoch': 1} {'type': 'loss', 'content': 0.012357932515442371, 'timestamp': '2025-09-30 23:03:38.061876', 'step': 1886, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:38.116171', 'step': 1886, 'epoch': 1} {'type': 'loss', 'content': 0.04336729273200035, 'timestamp': '2025-09-30 23:03:38.118533', 'step': 1887, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:38.173182', 'step': 1887, 'epoch': 1} {'type': 'loss', 'content': 0.026215553283691406, 'timestamp': '2025-09-30 23:03:38.179041', 'step': 1888, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:38.235715', 'step': 1888, 'epoch': 1} {'type': 'loss', 'content': 0.03261777013540268, 'timestamp': '2025-09-30 23:03:38.238140', 'step': 1889, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:38.292296', 'step': 1889, 'epoch': 1} {'type': 'loss', 'content': 0.007995059713721275, 'timestamp': '2025-09-30 23:03:38.298630', 'step': 1890, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:38.360106', 'step': 1890, 'epoch': 1} {'type': 'loss', 'content': 0.025382041931152344, 'timestamp': '2025-09-30 23:03:38.369532', 'step': 1891, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:38.443485', 'step': 1891, 'epoch': 1} {'type': 'loss', 'content': 0.0378556065261364, 'timestamp': '2025-09-30 23:03:38.450394', 'step': 1892, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:38.503326', 'step': 1892, 'epoch': 1} {'type': 'loss', 'content': 0.01311625074595213, 'timestamp': '2025-09-30 23:03:38.505721', 'step': 1893, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:38.560523', 'step': 1893, 'epoch': 1} {'type': 'loss', 'content': 0.023856965824961662, 'timestamp': '2025-09-30 23:03:38.563321', 'step': 1894, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:38.616756', 'step': 1894, 'epoch': 1} {'type': 'loss', 'content': 0.0090571204200387, 'timestamp': '2025-09-30 23:03:38.618722', 'step': 1895, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:38.672558', 'step': 1895, 'epoch': 1} {'type': 'loss', 'content': 0.02310054562985897, 'timestamp': '2025-09-30 23:03:38.678043', 'step': 1896, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:38.745388', 'step': 1896, 'epoch': 1} {'type': 'loss', 'content': 0.023289546370506287, 'timestamp': '2025-09-30 23:03:38.747959', 'step': 1897, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:38.803031', 'step': 1897, 'epoch': 1} {'type': 'loss', 'content': 0.015966741368174553, 'timestamp': '2025-09-30 23:03:38.806005', 'step': 1898, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:03:38.859840', 'step': 1898, 'epoch': 1} {'type': 'loss', 'content': 0.029893329367041588, 'timestamp': '2025-09-30 23:03:38.862799', 'step': 1899, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:38.916844', 'step': 1899, 'epoch': 1} {'type': 'loss', 'content': 0.027994263917207718, 'timestamp': '2025-09-30 23:03:38.923081', 'step': 1900, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:38.975826', 'step': 1900, 'epoch': 1} {'type': 'loss', 'content': 0.035891883075237274, 'timestamp': '2025-09-30 23:03:38.978375', 'step': 1901, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:39.032317', 'step': 1901, 'epoch': 1} {'type': 'loss', 'content': 0.04275408759713173, 'timestamp': '2025-09-30 23:03:39.034612', 'step': 1902, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:39.087756', 'step': 1902, 'epoch': 1} {'type': 'loss', 'content': 0.01093245018273592, 'timestamp': '2025-09-30 23:03:39.091010', 'step': 1903, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:03:39.143315', 'step': 1903, 'epoch': 1} {'type': 'loss', 'content': 0.017983654513955116, 'timestamp': '2025-09-30 23:03:39.149467', 'step': 1904, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:39.202889', 'step': 1904, 'epoch': 1} {'type': 'loss', 'content': 0.025035114958882332, 'timestamp': '2025-09-30 23:03:39.205662', 'step': 1905, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:39.259425', 'step': 1905, 'epoch': 1} {'type': 'loss', 'content': 0.041090283542871475, 'timestamp': '2025-09-30 23:03:39.261727', 'step': 1906, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:39.315281', 'step': 1906, 'epoch': 1} {'type': 'loss', 'content': 0.006545861717313528, 'timestamp': '2025-09-30 23:03:39.317750', 'step': 1907, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:39.371578', 'step': 1907, 'epoch': 1} {'type': 'loss', 'content': 0.02441316656768322, 'timestamp': '2025-09-30 23:03:39.377636', 'step': 1908, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:39.432077', 'step': 1908, 'epoch': 1} {'type': 'loss', 'content': 0.04455588012933731, 'timestamp': '2025-09-30 23:03:39.441608', 'step': 1909, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:39.506204', 'step': 1909, 'epoch': 1} {'type': 'loss', 'content': 0.023499222472310066, 'timestamp': '2025-09-30 23:03:39.511097', 'step': 1910, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:39.579694', 'step': 1910, 'epoch': 1} {'type': 'loss', 'content': 0.029459161683917046, 'timestamp': '2025-09-30 23:03:39.581769', 'step': 1911, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:39.636937', 'step': 1911, 'epoch': 1} {'type': 'loss', 'content': 0.02016403339803219, 'timestamp': '2025-09-30 23:03:39.644640', 'step': 1912, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:39.700095', 'step': 1912, 'epoch': 1} {'type': 'loss', 'content': 0.027036422863602638, 'timestamp': '2025-09-30 23:03:39.702201', 'step': 1913, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:39.755636', 'step': 1913, 'epoch': 1} {'type': 'loss', 'content': 0.008368094451725483, 'timestamp': '2025-09-30 23:03:39.758038', 'step': 1914, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:39.813493', 'step': 1914, 'epoch': 1} {'type': 'loss', 'content': 0.015152723528444767, 'timestamp': '2025-09-30 23:03:39.817095', 'step': 1915, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:39.875904', 'step': 1915, 'epoch': 1} {'type': 'loss', 'content': 0.040227241814136505, 'timestamp': '2025-09-30 23:03:39.882700', 'step': 1916, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:39.934885', 'step': 1916, 'epoch': 1} {'type': 'loss', 'content': 0.0204449649900198, 'timestamp': '2025-09-30 23:03:39.938246', 'step': 1917, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:39.995557', 'step': 1917, 'epoch': 1} {'type': 'loss', 'content': 0.005960437469184399, 'timestamp': '2025-09-30 23:03:39.999035', 'step': 1918, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:40.053151', 'step': 1918, 'epoch': 1} {'type': 'loss', 'content': 0.015630150213837624, 'timestamp': '2025-09-30 23:03:40.055718', 'step': 1919, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:40.109919', 'step': 1919, 'epoch': 1} {'type': 'loss', 'content': 0.012236611917614937, 'timestamp': '2025-09-30 23:03:40.116137', 'step': 1920, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:40.169015', 'step': 1920, 'epoch': 1} {'type': 'loss', 'content': 0.06850045919418335, 'timestamp': '2025-09-30 23:03:40.171599', 'step': 1921, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:40.225426', 'step': 1921, 'epoch': 1} {'type': 'loss', 'content': 0.03593562915921211, 'timestamp': '2025-09-30 23:03:40.227859', 'step': 1922, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:40.281570', 'step': 1922, 'epoch': 1} {'type': 'loss', 'content': 0.06695842742919922, 'timestamp': '2025-09-30 23:03:40.284390', 'step': 1923, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:40.338781', 'step': 1923, 'epoch': 1} {'type': 'loss', 'content': 0.0753776952624321, 'timestamp': '2025-09-30 23:03:40.344754', 'step': 1924, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:40.397266', 'step': 1924, 'epoch': 1} {'type': 'loss', 'content': 0.01597183756530285, 'timestamp': '2025-09-30 23:03:40.399521', 'step': 1925, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:40.453493', 'step': 1925, 'epoch': 1} {'type': 'loss', 'content': 0.029036883264780045, 'timestamp': '2025-09-30 23:03:40.455833', 'step': 1926, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:40.508732', 'step': 1926, 'epoch': 1} {'type': 'loss', 'content': 0.08011870831251144, 'timestamp': '2025-09-30 23:03:40.511461', 'step': 1927, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:40.566192', 'step': 1927, 'epoch': 1} {'type': 'loss', 'content': 0.04809867963194847, 'timestamp': '2025-09-30 23:03:40.573506', 'step': 1928, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:40.631708', 'step': 1928, 'epoch': 1} {'type': 'loss', 'content': 0.07656770199537277, 'timestamp': '2025-09-30 23:03:40.648295', 'step': 1929, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:40.708921', 'step': 1929, 'epoch': 1} {'type': 'loss', 'content': 0.046716298907995224, 'timestamp': '2025-09-30 23:03:40.712092', 'step': 1930, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:40.768368', 'step': 1930, 'epoch': 1} {'type': 'loss', 'content': 0.039750877767801285, 'timestamp': '2025-09-30 23:03:40.770758', 'step': 1931, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:40.824184', 'step': 1931, 'epoch': 1} {'type': 'loss', 'content': 0.03977865353226662, 'timestamp': '2025-09-30 23:03:40.831920', 'step': 1932, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:40.886936', 'step': 1932, 'epoch': 1} {'type': 'loss', 'content': 0.0069872597232460976, 'timestamp': '2025-09-30 23:03:40.895812', 'step': 1933, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:40.948493', 'step': 1933, 'epoch': 1} {'type': 'loss', 'content': 0.038729455322027206, 'timestamp': '2025-09-30 23:03:40.950779', 'step': 1934, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:41.009788', 'step': 1934, 'epoch': 1} {'type': 'loss', 'content': 0.03359084576368332, 'timestamp': '2025-09-30 23:03:41.012617', 'step': 1935, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:41.067158', 'step': 1935, 'epoch': 1} {'type': 'loss', 'content': 0.09144307672977448, 'timestamp': '2025-09-30 23:03:41.072979', 'step': 1936, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:41.129178', 'step': 1936, 'epoch': 1} {'type': 'loss', 'content': 0.0141996368765831, 'timestamp': '2025-09-30 23:03:41.133775', 'step': 1937, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:41.188435', 'step': 1937, 'epoch': 1} {'type': 'loss', 'content': 0.024179305881261826, 'timestamp': '2025-09-30 23:03:41.195959', 'step': 1938, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:41.250979', 'step': 1938, 'epoch': 1} {'type': 'loss', 'content': 0.06279733031988144, 'timestamp': '2025-09-30 23:03:41.253839', 'step': 1939, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:41.310274', 'step': 1939, 'epoch': 1} {'type': 'loss', 'content': 0.04380693659186363, 'timestamp': '2025-09-30 23:03:41.316421', 'step': 1940, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:41.374967', 'step': 1940, 'epoch': 1} {'type': 'loss', 'content': 0.007149507291615009, 'timestamp': '2025-09-30 23:03:41.378448', 'step': 1941, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:41.432253', 'step': 1941, 'epoch': 1} {'type': 'loss', 'content': 0.05265026167035103, 'timestamp': '2025-09-30 23:03:41.437348', 'step': 1942, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:41.491091', 'step': 1942, 'epoch': 1} {'type': 'loss', 'content': 0.017635921016335487, 'timestamp': '2025-09-30 23:03:41.493356', 'step': 1943, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:41.557227', 'step': 1943, 'epoch': 1} {'type': 'loss', 'content': 0.017970101907849312, 'timestamp': '2025-09-30 23:03:41.573020', 'step': 1944, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:41.631500', 'step': 1944, 'epoch': 1} {'type': 'loss', 'content': 0.035259880125522614, 'timestamp': '2025-09-30 23:03:41.634226', 'step': 1945, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:41.693177', 'step': 1945, 'epoch': 1} {'type': 'loss', 'content': 0.013490930199623108, 'timestamp': '2025-09-30 23:03:41.700254', 'step': 1946, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:41.755792', 'step': 1946, 'epoch': 1} {'type': 'loss', 'content': 0.021095823496580124, 'timestamp': '2025-09-30 23:03:41.758349', 'step': 1947, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:41.813989', 'step': 1947, 'epoch': 1} {'type': 'loss', 'content': 0.044934339821338654, 'timestamp': '2025-09-30 23:03:41.820024', 'step': 1948, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:41.874483', 'step': 1948, 'epoch': 1} {'type': 'loss', 'content': 0.013177521526813507, 'timestamp': '2025-09-30 23:03:41.883035', 'step': 1949, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:41.952127', 'step': 1949, 'epoch': 1} {'type': 'loss', 'content': 0.023083774372935295, 'timestamp': '2025-09-30 23:03:41.954464', 'step': 1950, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:42.009197', 'step': 1950, 'epoch': 1} {'type': 'loss', 'content': 0.05409770831465721, 'timestamp': '2025-09-30 23:03:42.011911', 'step': 1951, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:42.077034', 'step': 1951, 'epoch': 1} {'type': 'loss', 'content': 0.02720683626830578, 'timestamp': '2025-09-30 23:03:42.085826', 'step': 1952, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:42.144288', 'step': 1952, 'epoch': 1} {'type': 'loss', 'content': 0.018925445154309273, 'timestamp': '2025-09-30 23:03:42.147204', 'step': 1953, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:42.208387', 'step': 1953, 'epoch': 1} {'type': 'loss', 'content': 0.026734214276075363, 'timestamp': '2025-09-30 23:03:42.210780', 'step': 1954, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:42.264680', 'step': 1954, 'epoch': 1} {'type': 'loss', 'content': 0.04976202920079231, 'timestamp': '2025-09-30 23:03:42.267993', 'step': 1955, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:42.323364', 'step': 1955, 'epoch': 1} {'type': 'loss', 'content': 0.0347769521176815, 'timestamp': '2025-09-30 23:03:42.329678', 'step': 1956, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:42.399255', 'step': 1956, 'epoch': 1} {'type': 'loss', 'content': 0.03161564841866493, 'timestamp': '2025-09-30 23:03:42.401892', 'step': 1957, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:42.456940', 'step': 1957, 'epoch': 1} {'type': 'loss', 'content': 0.07986608892679214, 'timestamp': '2025-09-30 23:03:42.471861', 'step': 1958, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:42.526208', 'step': 1958, 'epoch': 1} {'type': 'loss', 'content': 0.013257990591228008, 'timestamp': '2025-09-30 23:03:42.528540', 'step': 1959, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:42.587577', 'step': 1959, 'epoch': 1} {'type': 'loss', 'content': 0.04054868221282959, 'timestamp': '2025-09-30 23:03:42.594902', 'step': 1960, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:42.652326', 'step': 1960, 'epoch': 1} {'type': 'loss', 'content': 0.04905771464109421, 'timestamp': '2025-09-30 23:03:42.656324', 'step': 1961, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:42.711631', 'step': 1961, 'epoch': 1} {'type': 'loss', 'content': 0.016674740239977837, 'timestamp': '2025-09-30 23:03:42.714316', 'step': 1962, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:42.769399', 'step': 1962, 'epoch': 1} {'type': 'loss', 'content': 0.034370072185993195, 'timestamp': '2025-09-30 23:03:42.772772', 'step': 1963, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:42.829310', 'step': 1963, 'epoch': 1} {'type': 'loss', 'content': 0.023396670818328857, 'timestamp': '2025-09-30 23:03:42.844289', 'step': 1964, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:42.897003', 'step': 1964, 'epoch': 1} {'type': 'loss', 'content': 0.02343648672103882, 'timestamp': '2025-09-30 23:03:42.900198', 'step': 1965, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:42.953821', 'step': 1965, 'epoch': 1} {'type': 'loss', 'content': 0.055996786803007126, 'timestamp': '2025-09-30 23:03:42.957005', 'step': 1966, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:43.010642', 'step': 1966, 'epoch': 1} {'type': 'loss', 'content': 0.023821784183382988, 'timestamp': '2025-09-30 23:03:43.012940', 'step': 1967, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:43.066098', 'step': 1967, 'epoch': 1} {'type': 'loss', 'content': 0.02353341318666935, 'timestamp': '2025-09-30 23:03:43.072269', 'step': 1968, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:43.125446', 'step': 1968, 'epoch': 1} {'type': 'loss', 'content': 0.032681290060281754, 'timestamp': '2025-09-30 23:03:43.128061', 'step': 1969, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:43.183674', 'step': 1969, 'epoch': 1} {'type': 'loss', 'content': 0.0312732495367527, 'timestamp': '2025-09-30 23:03:43.189661', 'step': 1970, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:43.247600', 'step': 1970, 'epoch': 1} {'type': 'loss', 'content': 0.019440200179815292, 'timestamp': '2025-09-30 23:03:43.251346', 'step': 1971, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:43.306499', 'step': 1971, 'epoch': 1} {'type': 'loss', 'content': 0.029196863994002342, 'timestamp': '2025-09-30 23:03:43.323299', 'step': 1972, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:43.376120', 'step': 1972, 'epoch': 1} {'type': 'loss', 'content': 0.047552257776260376, 'timestamp': '2025-09-30 23:03:43.378484', 'step': 1973, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:43.437893', 'step': 1973, 'epoch': 1} {'type': 'loss', 'content': 0.027420785278081894, 'timestamp': '2025-09-30 23:03:43.445049', 'step': 1974, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:43.498340', 'step': 1974, 'epoch': 1} {'type': 'loss', 'content': 0.03618454560637474, 'timestamp': '2025-09-30 23:03:43.500731', 'step': 1975, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:43.553105', 'step': 1975, 'epoch': 1} {'type': 'loss', 'content': 0.03192310407757759, 'timestamp': '2025-09-30 23:03:43.559094', 'step': 1976, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1596914505344}], 'timestamp': '2025-09-30 23:03:47.370688', 'step': 1976, 'epoch': 1} {'type': 'pplx', 'content': 5699249.856545136, 'timestamp': '2025-09-30 23:03:47.372957', 'step': 1976, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:47.426501', 'step': 1976, 'epoch': 1} {'type': 'loss', 'content': 0.03999670594930649, 'timestamp': '2025-09-30 23:03:47.429089', 'step': 1977, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:47.483745', 'step': 1977, 'epoch': 1} {'type': 'loss', 'content': 0.026708219200372696, 'timestamp': '2025-09-30 23:03:47.486044', 'step': 1978, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:47.540799', 'step': 1978, 'epoch': 1} {'type': 'loss', 'content': 0.05714083090424538, 'timestamp': '2025-09-30 23:03:47.543137', 'step': 1979, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:47.596317', 'step': 1979, 'epoch': 1} {'type': 'loss', 'content': 0.012188521213829517, 'timestamp': '2025-09-30 23:03:47.607439', 'step': 1980, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:47.660590', 'step': 1980, 'epoch': 1} {'type': 'loss', 'content': 0.05697813257575035, 'timestamp': '2025-09-30 23:03:47.664902', 'step': 1981, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:03:47.718953', 'step': 1981, 'epoch': 1} {'type': 'loss', 'content': 0.048869360238313675, 'timestamp': '2025-09-30 23:03:47.722038', 'step': 1982, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:47.776430', 'step': 1982, 'epoch': 1} {'type': 'loss', 'content': 0.025077657774090767, 'timestamp': '2025-09-30 23:03:47.779813', 'step': 1983, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:47.833076', 'step': 1983, 'epoch': 1} {'type': 'loss', 'content': 0.03274685516953468, 'timestamp': '2025-09-30 23:03:47.840189', 'step': 1984, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:47.906238', 'step': 1984, 'epoch': 1} {'type': 'loss', 'content': 0.03334812819957733, 'timestamp': '2025-09-30 23:03:47.908496', 'step': 1985, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:47.962033', 'step': 1985, 'epoch': 1} {'type': 'loss', 'content': 0.038076043128967285, 'timestamp': '2025-09-30 23:03:47.964820', 'step': 1986, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:48.018522', 'step': 1986, 'epoch': 1} {'type': 'loss', 'content': 0.04197216406464577, 'timestamp': '2025-09-30 23:03:48.023593', 'step': 1987, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:48.077337', 'step': 1987, 'epoch': 1} {'type': 'loss', 'content': 0.02517578937113285, 'timestamp': '2025-09-30 23:03:48.083713', 'step': 1988, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:48.137679', 'step': 1988, 'epoch': 1} {'type': 'loss', 'content': 0.027687177062034607, 'timestamp': '2025-09-30 23:03:48.140740', 'step': 1989, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:48.195756', 'step': 1989, 'epoch': 1} {'type': 'loss', 'content': 0.03410280868411064, 'timestamp': '2025-09-30 23:03:48.198206', 'step': 1990, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:48.251731', 'step': 1990, 'epoch': 1} {'type': 'loss', 'content': 0.026531964540481567, 'timestamp': '2025-09-30 23:03:48.254613', 'step': 1991, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:48.308439', 'step': 1991, 'epoch': 1} {'type': 'loss', 'content': 0.010890255682170391, 'timestamp': '2025-09-30 23:03:48.315175', 'step': 1992, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:48.367085', 'step': 1992, 'epoch': 1} {'type': 'loss', 'content': 0.012522404082119465, 'timestamp': '2025-09-30 23:03:48.369109', 'step': 1993, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:48.422357', 'step': 1993, 'epoch': 1} {'type': 'loss', 'content': 0.02335711382329464, 'timestamp': '2025-09-30 23:03:48.425596', 'step': 1994, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:48.478387', 'step': 1994, 'epoch': 1} {'type': 'loss', 'content': 0.010581307113170624, 'timestamp': '2025-09-30 23:03:48.481546', 'step': 1995, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:48.533027', 'step': 1995, 'epoch': 1} {'type': 'loss', 'content': 0.02089252881705761, 'timestamp': '2025-09-30 23:03:48.539307', 'step': 1996, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:48.590621', 'step': 1996, 'epoch': 1} {'type': 'loss', 'content': 0.040130019187927246, 'timestamp': '2025-09-30 23:03:48.593946', 'step': 1997, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:48.647484', 'step': 1997, 'epoch': 1} {'type': 'loss', 'content': 0.010590904392302036, 'timestamp': '2025-09-30 23:03:48.651503', 'step': 1998, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:48.705445', 'step': 1998, 'epoch': 1} {'type': 'loss', 'content': 0.042553532868623734, 'timestamp': '2025-09-30 23:03:48.707834', 'step': 1999, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:48.765437', 'step': 1999, 'epoch': 1} {'type': 'loss', 'content': 0.038362231105566025, 'timestamp': '2025-09-30 23:03:48.772767', 'step': 2000, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 2000', 'timestamp': '2025-09-30 23:03:49.182695', 'step': 2000, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:49.240111', 'step': 2000, 'epoch': 1} {'type': 'loss', 'content': 0.03531741723418236, 'timestamp': '2025-09-30 23:03:49.242892', 'step': 2001, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:49.309419', 'step': 2001, 'epoch': 1} {'type': 'loss', 'content': 0.01730552315711975, 'timestamp': '2025-09-30 23:03:49.312777', 'step': 2002, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:49.366488', 'step': 2002, 'epoch': 1} {'type': 'loss', 'content': 0.02864089421927929, 'timestamp': '2025-09-30 23:03:49.369223', 'step': 2003, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:49.424410', 'step': 2003, 'epoch': 1} {'type': 'loss', 'content': 0.05157962813973427, 'timestamp': '2025-09-30 23:03:49.431224', 'step': 2004, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:49.484559', 'step': 2004, 'epoch': 1} {'type': 'loss', 'content': 0.04491826891899109, 'timestamp': '2025-09-30 23:03:49.487513', 'step': 2005, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:49.542831', 'step': 2005, 'epoch': 1} {'type': 'loss', 'content': 0.03716299682855606, 'timestamp': '2025-09-30 23:03:49.546460', 'step': 2006, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:03:49.602624', 'step': 2006, 'epoch': 1} {'type': 'loss', 'content': 0.02766716480255127, 'timestamp': '2025-09-30 23:03:49.605032', 'step': 2007, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:49.659469', 'step': 2007, 'epoch': 1} {'type': 'loss', 'content': 0.024677423760294914, 'timestamp': '2025-09-30 23:03:49.665567', 'step': 2008, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:49.719947', 'step': 2008, 'epoch': 1} {'type': 'loss', 'content': 0.01764793135225773, 'timestamp': '2025-09-30 23:03:49.722734', 'step': 2009, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:49.777414', 'step': 2009, 'epoch': 1} {'type': 'loss', 'content': 0.024941762909293175, 'timestamp': '2025-09-30 23:03:49.779830', 'step': 2010, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:49.834316', 'step': 2010, 'epoch': 1} {'type': 'loss', 'content': 0.037777017802000046, 'timestamp': '2025-09-30 23:03:49.847810', 'step': 2011, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:49.904884', 'step': 2011, 'epoch': 1} {'type': 'loss', 'content': 0.028316548094153404, 'timestamp': '2025-09-30 23:03:49.912929', 'step': 2012, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:49.967543', 'step': 2012, 'epoch': 1} {'type': 'loss', 'content': 0.02853056788444519, 'timestamp': '2025-09-30 23:03:49.969859', 'step': 2013, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:50.024922', 'step': 2013, 'epoch': 1} {'type': 'loss', 'content': 0.06701947748661041, 'timestamp': '2025-09-30 23:03:50.029186', 'step': 2014, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:50.086550', 'step': 2014, 'epoch': 1} {'type': 'loss', 'content': 0.0032052304595708847, 'timestamp': '2025-09-30 23:03:50.088755', 'step': 2015, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:50.142020', 'step': 2015, 'epoch': 1} {'type': 'loss', 'content': 0.013747108168900013, 'timestamp': '2025-09-30 23:03:50.148762', 'step': 2016, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:50.202812', 'step': 2016, 'epoch': 1} {'type': 'loss', 'content': 0.03917836397886276, 'timestamp': '2025-09-30 23:03:50.205462', 'step': 2017, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:50.258215', 'step': 2017, 'epoch': 1} {'type': 'loss', 'content': 0.013695626519620419, 'timestamp': '2025-09-30 23:03:50.260921', 'step': 2018, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:50.314674', 'step': 2018, 'epoch': 1} {'type': 'loss', 'content': 0.023288708180189133, 'timestamp': '2025-09-30 23:03:50.317277', 'step': 2019, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:50.370559', 'step': 2019, 'epoch': 1} {'type': 'loss', 'content': 0.03267846256494522, 'timestamp': '2025-09-30 23:03:50.378418', 'step': 2020, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:50.431614', 'step': 2020, 'epoch': 1} {'type': 'loss', 'content': 0.024368468672037125, 'timestamp': '2025-09-30 23:03:50.433760', 'step': 2021, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:50.488318', 'step': 2021, 'epoch': 1} {'type': 'loss', 'content': 0.04333638772368431, 'timestamp': '2025-09-30 23:03:50.491635', 'step': 2022, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:50.546723', 'step': 2022, 'epoch': 1} {'type': 'loss', 'content': 0.04538843035697937, 'timestamp': '2025-09-30 23:03:50.549008', 'step': 2023, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:50.603608', 'step': 2023, 'epoch': 1} {'type': 'loss', 'content': 0.08297090977430344, 'timestamp': '2025-09-30 23:03:50.610163', 'step': 2024, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:50.666748', 'step': 2024, 'epoch': 1} {'type': 'loss', 'content': 0.011968552134931087, 'timestamp': '2025-09-30 23:03:50.668841', 'step': 2025, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:50.722107', 'step': 2025, 'epoch': 1} {'type': 'loss', 'content': 0.00495560746639967, 'timestamp': '2025-09-30 23:03:50.725729', 'step': 2026, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:50.779036', 'step': 2026, 'epoch': 1} {'type': 'loss', 'content': 0.013399242423474789, 'timestamp': '2025-09-30 23:03:50.782078', 'step': 2027, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:50.843600', 'step': 2027, 'epoch': 1} {'type': 'loss', 'content': 0.056431472301483154, 'timestamp': '2025-09-30 23:03:50.850013', 'step': 2028, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:50.903039', 'step': 2028, 'epoch': 1} {'type': 'loss', 'content': 0.03620040416717529, 'timestamp': '2025-09-30 23:03:50.905933', 'step': 2029, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:50.964324', 'step': 2029, 'epoch': 1} {'type': 'loss', 'content': 0.005144848022609949, 'timestamp': '2025-09-30 23:03:50.966925', 'step': 2030, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:51.021042', 'step': 2030, 'epoch': 1} {'type': 'loss', 'content': 0.0673978179693222, 'timestamp': '2025-09-30 23:03:51.026168', 'step': 2031, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:51.086057', 'step': 2031, 'epoch': 1} {'type': 'loss', 'content': 0.01902267336845398, 'timestamp': '2025-09-30 23:03:51.094053', 'step': 2032, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:51.145870', 'step': 2032, 'epoch': 1} {'type': 'loss', 'content': 0.012531318701803684, 'timestamp': '2025-09-30 23:03:51.149527', 'step': 2033, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:51.201864', 'step': 2033, 'epoch': 1} {'type': 'loss', 'content': 0.02538827247917652, 'timestamp': '2025-09-30 23:03:51.203871', 'step': 2034, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:51.256771', 'step': 2034, 'epoch': 1} {'type': 'loss', 'content': 0.023673536255955696, 'timestamp': '2025-09-30 23:03:51.263025', 'step': 2035, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:51.321468', 'step': 2035, 'epoch': 1} {'type': 'loss', 'content': 0.02763092890381813, 'timestamp': '2025-09-30 23:03:51.327919', 'step': 2036, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:51.379585', 'step': 2036, 'epoch': 1} {'type': 'loss', 'content': 0.016662003472447395, 'timestamp': '2025-09-30 23:03:51.381914', 'step': 2037, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:51.439298', 'step': 2037, 'epoch': 1} {'type': 'loss', 'content': 0.008093689568340778, 'timestamp': '2025-09-30 23:03:51.445858', 'step': 2038, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:51.518252', 'step': 2038, 'epoch': 1} {'type': 'loss', 'content': 0.014605613425374031, 'timestamp': '2025-09-30 23:03:51.521547', 'step': 2039, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:51.574774', 'step': 2039, 'epoch': 1} {'type': 'loss', 'content': 0.006180983502417803, 'timestamp': '2025-09-30 23:03:51.582024', 'step': 2040, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:51.636618', 'step': 2040, 'epoch': 1} {'type': 'loss', 'content': 0.04361100122332573, 'timestamp': '2025-09-30 23:03:51.642438', 'step': 2041, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:51.701579', 'step': 2041, 'epoch': 1} {'type': 'loss', 'content': 0.04096632078289986, 'timestamp': '2025-09-30 23:03:51.713986', 'step': 2042, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:51.779932', 'step': 2042, 'epoch': 1} {'type': 'loss', 'content': 0.027210870757699013, 'timestamp': '2025-09-30 23:03:51.781713', 'step': 2043, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:51.841905', 'step': 2043, 'epoch': 1} {'type': 'loss', 'content': 0.004255576524883509, 'timestamp': '2025-09-30 23:03:51.857631', 'step': 2044, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:51.910591', 'step': 2044, 'epoch': 1} {'type': 'loss', 'content': 0.03090512380003929, 'timestamp': '2025-09-30 23:03:51.913144', 'step': 2045, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:51.967151', 'step': 2045, 'epoch': 1} {'type': 'loss', 'content': 0.020715123042464256, 'timestamp': '2025-09-30 23:03:51.976703', 'step': 2046, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:52.048349', 'step': 2046, 'epoch': 1} {'type': 'loss', 'content': 0.015611001290380955, 'timestamp': '2025-09-30 23:03:52.050866', 'step': 2047, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:52.104930', 'step': 2047, 'epoch': 1} {'type': 'loss', 'content': 0.02239675633609295, 'timestamp': '2025-09-30 23:03:52.111402', 'step': 2048, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:52.165132', 'step': 2048, 'epoch': 1} {'type': 'loss', 'content': 0.027883470058441162, 'timestamp': '2025-09-30 23:03:52.169351', 'step': 2049, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:52.226140', 'step': 2049, 'epoch': 1} {'type': 'loss', 'content': 0.024848734959959984, 'timestamp': '2025-09-30 23:03:52.230248', 'step': 2050, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:52.287489', 'step': 2050, 'epoch': 1} {'type': 'loss', 'content': 0.07970862835645676, 'timestamp': '2025-09-30 23:03:52.289667', 'step': 2051, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:52.349280', 'step': 2051, 'epoch': 1} {'type': 'loss', 'content': 0.009356423281133175, 'timestamp': '2025-09-30 23:03:52.355595', 'step': 2052, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:52.407894', 'step': 2052, 'epoch': 1} {'type': 'loss', 'content': 0.032419778406620026, 'timestamp': '2025-09-30 23:03:52.410910', 'step': 2053, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:52.469306', 'step': 2053, 'epoch': 1} {'type': 'loss', 'content': 0.0342586524784565, 'timestamp': '2025-09-30 23:03:52.474928', 'step': 2054, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:52.529141', 'step': 2054, 'epoch': 1} {'type': 'loss', 'content': 0.0052504995837807655, 'timestamp': '2025-09-30 23:03:52.531951', 'step': 2055, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:52.584712', 'step': 2055, 'epoch': 1} {'type': 'loss', 'content': 0.02062901295721531, 'timestamp': '2025-09-30 23:03:52.590515', 'step': 2056, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:52.648784', 'step': 2056, 'epoch': 1} {'type': 'loss', 'content': 0.025157557800412178, 'timestamp': '2025-09-30 23:03:52.651169', 'step': 2057, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:52.705919', 'step': 2057, 'epoch': 1} {'type': 'loss', 'content': 0.03745177760720253, 'timestamp': '2025-09-30 23:03:52.708126', 'step': 2058, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:52.767303', 'step': 2058, 'epoch': 1} {'type': 'loss', 'content': 0.004280290566384792, 'timestamp': '2025-09-30 23:03:52.782812', 'step': 2059, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:52.842511', 'step': 2059, 'epoch': 1} {'type': 'loss', 'content': 0.028366591781377792, 'timestamp': '2025-09-30 23:03:52.850676', 'step': 2060, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:52.905258', 'step': 2060, 'epoch': 1} {'type': 'loss', 'content': 0.009577122516930103, 'timestamp': '2025-09-30 23:03:52.915286', 'step': 2061, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:52.967283', 'step': 2061, 'epoch': 1} {'type': 'loss', 'content': 0.015253974124789238, 'timestamp': '2025-09-30 23:03:52.969593', 'step': 2062, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:53.022698', 'step': 2062, 'epoch': 1} {'type': 'loss', 'content': 0.017293650656938553, 'timestamp': '2025-09-30 23:03:53.025354', 'step': 2063, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:53.086823', 'step': 2063, 'epoch': 1} {'type': 'loss', 'content': 0.03940075635910034, 'timestamp': '2025-09-30 23:03:53.093002', 'step': 2064, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:53.147650', 'step': 2064, 'epoch': 1} {'type': 'loss', 'content': 0.012276643887162209, 'timestamp': '2025-09-30 23:03:53.150764', 'step': 2065, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:53.203195', 'step': 2065, 'epoch': 1} {'type': 'loss', 'content': 0.02922808937728405, 'timestamp': '2025-09-30 23:03:53.206605', 'step': 2066, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:53.260855', 'step': 2066, 'epoch': 1} {'type': 'loss', 'content': 0.026554586365818977, 'timestamp': '2025-09-30 23:03:53.263158', 'step': 2067, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:53.316821', 'step': 2067, 'epoch': 1} {'type': 'loss', 'content': 0.02523881010711193, 'timestamp': '2025-09-30 23:03:53.323438', 'step': 2068, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:53.375938', 'step': 2068, 'epoch': 1} {'type': 'loss', 'content': 0.027689754962921143, 'timestamp': '2025-09-30 23:03:53.379136', 'step': 2069, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:03:53.431808', 'step': 2069, 'epoch': 1} {'type': 'loss', 'content': 0.018460629507899284, 'timestamp': '2025-09-30 23:03:53.433901', 'step': 2070, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:53.489054', 'step': 2070, 'epoch': 1} {'type': 'loss', 'content': 0.01141655258834362, 'timestamp': '2025-09-30 23:03:53.491210', 'step': 2071, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:53.544971', 'step': 2071, 'epoch': 1} {'type': 'loss', 'content': 0.029298752546310425, 'timestamp': '2025-09-30 23:03:53.551317', 'step': 2072, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:53.604651', 'step': 2072, 'epoch': 1} {'type': 'loss', 'content': 0.07398209720849991, 'timestamp': '2025-09-30 23:03:53.606911', 'step': 2073, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:53.660982', 'step': 2073, 'epoch': 1} {'type': 'loss', 'content': 0.022639868780970573, 'timestamp': '2025-09-30 23:03:53.664636', 'step': 2074, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:53.718251', 'step': 2074, 'epoch': 1} {'type': 'loss', 'content': 0.007834055460989475, 'timestamp': '2025-09-30 23:03:53.721104', 'step': 2075, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:53.778661', 'step': 2075, 'epoch': 1} {'type': 'loss', 'content': 0.03582429513335228, 'timestamp': '2025-09-30 23:03:53.788028', 'step': 2076, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:53.845906', 'step': 2076, 'epoch': 1} {'type': 'loss', 'content': 0.021192163228988647, 'timestamp': '2025-09-30 23:03:53.849819', 'step': 2077, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:53.906560', 'step': 2077, 'epoch': 1} {'type': 'loss', 'content': 0.038603901863098145, 'timestamp': '2025-09-30 23:03:53.910752', 'step': 2078, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:53.968646', 'step': 2078, 'epoch': 1} {'type': 'loss', 'content': 0.0028774160891771317, 'timestamp': '2025-09-30 23:03:53.972128', 'step': 2079, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:54.027147', 'step': 2079, 'epoch': 1} {'type': 'loss', 'content': 0.05917197838425636, 'timestamp': '2025-09-30 23:03:54.034039', 'step': 2080, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:54.091897', 'step': 2080, 'epoch': 1} {'type': 'loss', 'content': 0.06105329841375351, 'timestamp': '2025-09-30 23:03:54.094130', 'step': 2081, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:54.150766', 'step': 2081, 'epoch': 1} {'type': 'loss', 'content': 0.017111511901021004, 'timestamp': '2025-09-30 23:03:54.154469', 'step': 2082, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:54.210386', 'step': 2082, 'epoch': 1} {'type': 'loss', 'content': 0.06000030040740967, 'timestamp': '2025-09-30 23:03:54.213251', 'step': 2083, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:54.269003', 'step': 2083, 'epoch': 1} {'type': 'loss', 'content': 0.019093574956059456, 'timestamp': '2025-09-30 23:03:54.276065', 'step': 2084, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:54.330259', 'step': 2084, 'epoch': 1} {'type': 'loss', 'content': 0.01072727795690298, 'timestamp': '2025-09-30 23:03:54.333248', 'step': 2085, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:54.390027', 'step': 2085, 'epoch': 1} {'type': 'loss', 'content': 0.011648053303360939, 'timestamp': '2025-09-30 23:03:54.394577', 'step': 2086, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:54.449315', 'step': 2086, 'epoch': 1} {'type': 'loss', 'content': 0.04122024402022362, 'timestamp': '2025-09-30 23:03:54.454213', 'step': 2087, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:54.513487', 'step': 2087, 'epoch': 1} {'type': 'loss', 'content': 0.019395768642425537, 'timestamp': '2025-09-30 23:03:54.520868', 'step': 2088, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:54.576545', 'step': 2088, 'epoch': 1} {'type': 'loss', 'content': 0.035379376262426376, 'timestamp': '2025-09-30 23:03:54.579450', 'step': 2089, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:54.639129', 'step': 2089, 'epoch': 1} {'type': 'loss', 'content': 0.02785179577767849, 'timestamp': '2025-09-30 23:03:54.643323', 'step': 2090, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:54.703227', 'step': 2090, 'epoch': 1} {'type': 'loss', 'content': 0.02458062395453453, 'timestamp': '2025-09-30 23:03:54.705840', 'step': 2091, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:54.762217', 'step': 2091, 'epoch': 1} {'type': 'loss', 'content': 0.017901793122291565, 'timestamp': '2025-09-30 23:03:54.772372', 'step': 2092, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:54.826685', 'step': 2092, 'epoch': 1} {'type': 'loss', 'content': 0.03511064872145653, 'timestamp': '2025-09-30 23:03:54.829582', 'step': 2093, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:54.886218', 'step': 2093, 'epoch': 1} {'type': 'loss', 'content': 0.03559527173638344, 'timestamp': '2025-09-30 23:03:54.888975', 'step': 2094, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:54.948604', 'step': 2094, 'epoch': 1} {'type': 'loss', 'content': 0.042562682181596756, 'timestamp': '2025-09-30 23:03:54.952693', 'step': 2095, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:55.007389', 'step': 2095, 'epoch': 1} {'type': 'loss', 'content': 0.034028273075819016, 'timestamp': '2025-09-30 23:03:55.014144', 'step': 2096, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:55.068839', 'step': 2096, 'epoch': 1} {'type': 'loss', 'content': 0.010145534761250019, 'timestamp': '2025-09-30 23:03:55.073695', 'step': 2097, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:55.128779', 'step': 2097, 'epoch': 1} {'type': 'loss', 'content': 0.05091949179768562, 'timestamp': '2025-09-30 23:03:55.132827', 'step': 2098, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:55.190314', 'step': 2098, 'epoch': 1} {'type': 'loss', 'content': 0.019774217158555984, 'timestamp': '2025-09-30 23:03:55.194074', 'step': 2099, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:55.253350', 'step': 2099, 'epoch': 1} {'type': 'loss', 'content': 0.007970406673848629, 'timestamp': '2025-09-30 23:03:55.260050', 'step': 2100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:55.314209', 'step': 2100, 'epoch': 1} {'type': 'loss', 'content': 0.014042118564248085, 'timestamp': '2025-09-30 23:03:55.318370', 'step': 2101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:55.374616', 'step': 2101, 'epoch': 1} {'type': 'loss', 'content': 0.020926639437675476, 'timestamp': '2025-09-30 23:03:55.377522', 'step': 2102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:55.435521', 'step': 2102, 'epoch': 1} {'type': 'loss', 'content': 0.05572899803519249, 'timestamp': '2025-09-30 23:03:55.439378', 'step': 2103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:55.494014', 'step': 2103, 'epoch': 1} {'type': 'loss', 'content': 0.022733459249138832, 'timestamp': '2025-09-30 23:03:55.499801', 'step': 2104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:55.555136', 'step': 2104, 'epoch': 1} {'type': 'loss', 'content': 0.02707267366349697, 'timestamp': '2025-09-30 23:03:55.557291', 'step': 2105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:55.612599', 'step': 2105, 'epoch': 1} {'type': 'loss', 'content': 0.027768058702349663, 'timestamp': '2025-09-30 23:03:55.615458', 'step': 2106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:55.671643', 'step': 2106, 'epoch': 1} {'type': 'loss', 'content': 0.04154873639345169, 'timestamp': '2025-09-30 23:03:55.673796', 'step': 2107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:03:55.727875', 'step': 2107, 'epoch': 1} {'type': 'loss', 'content': 0.018399732187390327, 'timestamp': '2025-09-30 23:03:55.733926', 'step': 2108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:55.797035', 'step': 2108, 'epoch': 1} {'type': 'loss', 'content': 0.04965203255414963, 'timestamp': '2025-09-30 23:03:55.800462', 'step': 2109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:55.852416', 'step': 2109, 'epoch': 1} {'type': 'loss', 'content': 0.035484395921230316, 'timestamp': '2025-09-30 23:03:55.854526', 'step': 2110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:55.907891', 'step': 2110, 'epoch': 1} {'type': 'loss', 'content': 0.026115676388144493, 'timestamp': '2025-09-30 23:03:55.910360', 'step': 2111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:55.965894', 'step': 2111, 'epoch': 1} {'type': 'loss', 'content': 0.04350583627820015, 'timestamp': '2025-09-30 23:03:55.971559', 'step': 2112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:56.035985', 'step': 2112, 'epoch': 1} {'type': 'loss', 'content': 0.015051382593810558, 'timestamp': '2025-09-30 23:03:56.042974', 'step': 2113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:56.104217', 'step': 2113, 'epoch': 1} {'type': 'loss', 'content': 0.011712937615811825, 'timestamp': '2025-09-30 23:03:56.111719', 'step': 2114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:56.171291', 'step': 2114, 'epoch': 1} {'type': 'loss', 'content': 0.025119688361883163, 'timestamp': '2025-09-30 23:03:56.173781', 'step': 2115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:56.227607', 'step': 2115, 'epoch': 1} {'type': 'loss', 'content': 0.04061850905418396, 'timestamp': '2025-09-30 23:03:56.233467', 'step': 2116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:56.285645', 'step': 2116, 'epoch': 1} {'type': 'loss', 'content': 0.004628745373338461, 'timestamp': '2025-09-30 23:03:56.289823', 'step': 2117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:56.347408', 'step': 2117, 'epoch': 1} {'type': 'loss', 'content': 0.014587272889912128, 'timestamp': '2025-09-30 23:03:56.350701', 'step': 2118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:56.404147', 'step': 2118, 'epoch': 1} {'type': 'loss', 'content': 0.03633318468928337, 'timestamp': '2025-09-30 23:03:56.406845', 'step': 2119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:56.460414', 'step': 2119, 'epoch': 1} {'type': 'loss', 'content': 0.0433448888361454, 'timestamp': '2025-09-30 23:03:56.466197', 'step': 2120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:03:56.521141', 'step': 2120, 'epoch': 1} {'type': 'loss', 'content': 0.02989795431494713, 'timestamp': '2025-09-30 23:03:56.523256', 'step': 2121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:56.577868', 'step': 2121, 'epoch': 1} {'type': 'loss', 'content': 0.018848398700356483, 'timestamp': '2025-09-30 23:03:56.580417', 'step': 2122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:56.633587', 'step': 2122, 'epoch': 1} {'type': 'loss', 'content': 0.049914807081222534, 'timestamp': '2025-09-30 23:03:56.635628', 'step': 2123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:56.687570', 'step': 2123, 'epoch': 1} {'type': 'loss', 'content': 0.01873122714459896, 'timestamp': '2025-09-30 23:03:56.693470', 'step': 2124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:56.754710', 'step': 2124, 'epoch': 1} {'type': 'loss', 'content': 0.010873228311538696, 'timestamp': '2025-09-30 23:03:56.756716', 'step': 2125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:56.810928', 'step': 2125, 'epoch': 1} {'type': 'loss', 'content': 0.006760451011359692, 'timestamp': '2025-09-30 23:03:56.813700', 'step': 2126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:56.867412', 'step': 2126, 'epoch': 1} {'type': 'loss', 'content': 0.011765102855861187, 'timestamp': '2025-09-30 23:03:56.884392', 'step': 2127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:03:56.966120', 'step': 2127, 'epoch': 1} {'type': 'loss', 'content': 0.015564651228487492, 'timestamp': '2025-09-30 23:03:56.980469', 'step': 2128, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1596914505344}], 'timestamp': '2025-09-30 23:04:01.875309', 'step': 2128, 'epoch': 1} {'type': 'pplx', 'content': 6077836.550173923, 'timestamp': '2025-09-30 23:04:01.886163', 'step': 2128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:01.956857', 'step': 2128, 'epoch': 1} {'type': 'loss', 'content': 0.07137339562177658, 'timestamp': '2025-09-30 23:04:01.960949', 'step': 2129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:02.028517', 'step': 2129, 'epoch': 1} {'type': 'loss', 'content': 0.010942378081381321, 'timestamp': '2025-09-30 23:04:02.033350', 'step': 2130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:02.091921', 'step': 2130, 'epoch': 1} {'type': 'loss', 'content': 0.00922134518623352, 'timestamp': '2025-09-30 23:04:02.094907', 'step': 2131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:02.150154', 'step': 2131, 'epoch': 1} {'type': 'loss', 'content': 0.04563644528388977, 'timestamp': '2025-09-30 23:04:02.156667', 'step': 2132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:02.209411', 'step': 2132, 'epoch': 1} {'type': 'loss', 'content': 0.029743028804659843, 'timestamp': '2025-09-30 23:04:02.211699', 'step': 2133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:02.267001', 'step': 2133, 'epoch': 1} {'type': 'loss', 'content': 0.017551489174365997, 'timestamp': '2025-09-30 23:04:02.269318', 'step': 2134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:02.323482', 'step': 2134, 'epoch': 1} {'type': 'loss', 'content': 0.03597820922732353, 'timestamp': '2025-09-30 23:04:02.326698', 'step': 2135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:02.382421', 'step': 2135, 'epoch': 1} {'type': 'loss', 'content': 0.0165814608335495, 'timestamp': '2025-09-30 23:04:02.388666', 'step': 2136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:02.441962', 'step': 2136, 'epoch': 1} {'type': 'loss', 'content': 0.004107656888663769, 'timestamp': '2025-09-30 23:04:02.444311', 'step': 2137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:02.498400', 'step': 2137, 'epoch': 1} {'type': 'loss', 'content': 0.011941658332943916, 'timestamp': '2025-09-30 23:04:02.500725', 'step': 2138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:02.553661', 'step': 2138, 'epoch': 1} {'type': 'loss', 'content': 0.014822403900325298, 'timestamp': '2025-09-30 23:04:02.567533', 'step': 2139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:02.621058', 'step': 2139, 'epoch': 1} {'type': 'loss', 'content': 0.042595960199832916, 'timestamp': '2025-09-30 23:04:02.627294', 'step': 2140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:02.681354', 'step': 2140, 'epoch': 1} {'type': 'loss', 'content': 0.04761670157313347, 'timestamp': '2025-09-30 23:04:02.684951', 'step': 2141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:02.737902', 'step': 2141, 'epoch': 1} {'type': 'loss', 'content': 0.01430211216211319, 'timestamp': '2025-09-30 23:04:02.752332', 'step': 2142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:02.812254', 'step': 2142, 'epoch': 1} {'type': 'loss', 'content': 0.01043104100972414, 'timestamp': '2025-09-30 23:04:02.815017', 'step': 2143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:02.874596', 'step': 2143, 'epoch': 1} {'type': 'loss', 'content': 0.012417756021022797, 'timestamp': '2025-09-30 23:04:02.887850', 'step': 2144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:02.942160', 'step': 2144, 'epoch': 1} {'type': 'loss', 'content': 0.010059857740998268, 'timestamp': '2025-09-30 23:04:02.944713', 'step': 2145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:03.004661', 'step': 2145, 'epoch': 1} {'type': 'loss', 'content': 0.01295026857405901, 'timestamp': '2025-09-30 23:04:03.007965', 'step': 2146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:03.067366', 'step': 2146, 'epoch': 1} {'type': 'loss', 'content': 0.03601127490401268, 'timestamp': '2025-09-30 23:04:03.070658', 'step': 2147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:03.128903', 'step': 2147, 'epoch': 1} {'type': 'loss', 'content': 0.01998223178088665, 'timestamp': '2025-09-30 23:04:03.135941', 'step': 2148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:03.192576', 'step': 2148, 'epoch': 1} {'type': 'loss', 'content': 0.032508667558431625, 'timestamp': '2025-09-30 23:04:03.195202', 'step': 2149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:03.259249', 'step': 2149, 'epoch': 1} {'type': 'loss', 'content': 0.027125496417284012, 'timestamp': '2025-09-30 23:04:03.262094', 'step': 2150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:03.318779', 'step': 2150, 'epoch': 1} {'type': 'loss', 'content': 0.04170433431863785, 'timestamp': '2025-09-30 23:04:03.321737', 'step': 2151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:03.378076', 'step': 2151, 'epoch': 1} {'type': 'loss', 'content': 0.023523647338151932, 'timestamp': '2025-09-30 23:04:03.385493', 'step': 2152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:03.441248', 'step': 2152, 'epoch': 1} {'type': 'loss', 'content': 0.010684513486921787, 'timestamp': '2025-09-30 23:04:03.444932', 'step': 2153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:03.502202', 'step': 2153, 'epoch': 1} {'type': 'loss', 'content': 0.048522528260946274, 'timestamp': '2025-09-30 23:04:03.504303', 'step': 2154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:03.564715', 'step': 2154, 'epoch': 1} {'type': 'loss', 'content': 0.09616922587156296, 'timestamp': '2025-09-30 23:04:03.568739', 'step': 2155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:03.626066', 'step': 2155, 'epoch': 1} {'type': 'loss', 'content': 0.039134953171014786, 'timestamp': '2025-09-30 23:04:03.633087', 'step': 2156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:03.689278', 'step': 2156, 'epoch': 1} {'type': 'loss', 'content': 0.03428240120410919, 'timestamp': '2025-09-30 23:04:03.692074', 'step': 2157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:03.748428', 'step': 2157, 'epoch': 1} {'type': 'loss', 'content': 0.019369255751371384, 'timestamp': '2025-09-30 23:04:03.753427', 'step': 2158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:03.815485', 'step': 2158, 'epoch': 1} {'type': 'loss', 'content': 0.04669308289885521, 'timestamp': '2025-09-30 23:04:03.818578', 'step': 2159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:03.877864', 'step': 2159, 'epoch': 1} {'type': 'loss', 'content': 0.008504089899361134, 'timestamp': '2025-09-30 23:04:03.888036', 'step': 2160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:03.945603', 'step': 2160, 'epoch': 1} {'type': 'loss', 'content': 0.03183968737721443, 'timestamp': '2025-09-30 23:04:03.949871', 'step': 2161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:04.006810', 'step': 2161, 'epoch': 1} {'type': 'loss', 'content': 0.023601695895195007, 'timestamp': '2025-09-30 23:04:04.009625', 'step': 2162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:04.063726', 'step': 2162, 'epoch': 1} {'type': 'loss', 'content': 0.016928790137171745, 'timestamp': '2025-09-30 23:04:04.066116', 'step': 2163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:04.119601', 'step': 2163, 'epoch': 1} {'type': 'loss', 'content': 0.0034288896713405848, 'timestamp': '2025-09-30 23:04:04.125843', 'step': 2164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:04.179813', 'step': 2164, 'epoch': 1} {'type': 'loss', 'content': 0.03065258264541626, 'timestamp': '2025-09-30 23:04:04.182275', 'step': 2165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:04.235948', 'step': 2165, 'epoch': 1} {'type': 'loss', 'content': 0.0436113104224205, 'timestamp': '2025-09-30 23:04:04.238227', 'step': 2166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:04.292057', 'step': 2166, 'epoch': 1} {'type': 'loss', 'content': 0.03052457980811596, 'timestamp': '2025-09-30 23:04:04.294393', 'step': 2167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:04.348096', 'step': 2167, 'epoch': 1} {'type': 'loss', 'content': 0.009145120158791542, 'timestamp': '2025-09-30 23:04:04.353603', 'step': 2168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:04.405960', 'step': 2168, 'epoch': 1} {'type': 'loss', 'content': 0.05303872749209404, 'timestamp': '2025-09-30 23:04:04.409492', 'step': 2169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:04.463388', 'step': 2169, 'epoch': 1} {'type': 'loss', 'content': 0.02528241276741028, 'timestamp': '2025-09-30 23:04:04.467237', 'step': 2170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:04.522316', 'step': 2170, 'epoch': 1} {'type': 'loss', 'content': 0.010056803934276104, 'timestamp': '2025-09-30 23:04:04.524613', 'step': 2171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:04.577477', 'step': 2171, 'epoch': 1} {'type': 'loss', 'content': 0.025723382830619812, 'timestamp': '2025-09-30 23:04:04.583250', 'step': 2172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:04.635712', 'step': 2172, 'epoch': 1} {'type': 'loss', 'content': 0.010061271488666534, 'timestamp': '2025-09-30 23:04:04.639047', 'step': 2173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:04.693161', 'step': 2173, 'epoch': 1} {'type': 'loss', 'content': 0.01717689260840416, 'timestamp': '2025-09-30 23:04:04.695430', 'step': 2174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:04.748432', 'step': 2174, 'epoch': 1} {'type': 'loss', 'content': 0.00980756338685751, 'timestamp': '2025-09-30 23:04:04.753261', 'step': 2175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:04.812538', 'step': 2175, 'epoch': 1} {'type': 'loss', 'content': 0.027607565745711327, 'timestamp': '2025-09-30 23:04:04.818278', 'step': 2176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:04.874597', 'step': 2176, 'epoch': 1} {'type': 'loss', 'content': 0.0285017192363739, 'timestamp': '2025-09-30 23:04:04.876843', 'step': 2177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:04.932721', 'step': 2177, 'epoch': 1} {'type': 'loss', 'content': 0.018007148057222366, 'timestamp': '2025-09-30 23:04:04.935598', 'step': 2178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:04.988824', 'step': 2178, 'epoch': 1} {'type': 'loss', 'content': 0.023340344429016113, 'timestamp': '2025-09-30 23:04:04.992275', 'step': 2179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:05.045415', 'step': 2179, 'epoch': 1} {'type': 'loss', 'content': 0.007179324049502611, 'timestamp': '2025-09-30 23:04:05.051797', 'step': 2180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:05.105126', 'step': 2180, 'epoch': 1} {'type': 'loss', 'content': 0.017195003107190132, 'timestamp': '2025-09-30 23:04:05.107149', 'step': 2181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:05.161248', 'step': 2181, 'epoch': 1} {'type': 'loss', 'content': 0.10469655692577362, 'timestamp': '2025-09-30 23:04:05.164362', 'step': 2182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:05.228002', 'step': 2182, 'epoch': 1} {'type': 'loss', 'content': 0.04805386811494827, 'timestamp': '2025-09-30 23:04:05.230475', 'step': 2183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:05.283504', 'step': 2183, 'epoch': 1} {'type': 'loss', 'content': 0.01582690142095089, 'timestamp': '2025-09-30 23:04:05.289444', 'step': 2184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:05.343223', 'step': 2184, 'epoch': 1} {'type': 'loss', 'content': 0.039215534925460815, 'timestamp': '2025-09-30 23:04:05.356885', 'step': 2185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:05.412332', 'step': 2185, 'epoch': 1} {'type': 'loss', 'content': 0.028453361243009567, 'timestamp': '2025-09-30 23:04:05.414797', 'step': 2186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:05.471876', 'step': 2186, 'epoch': 1} {'type': 'loss', 'content': 0.017935076728463173, 'timestamp': '2025-09-30 23:04:05.474052', 'step': 2187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:05.541306', 'step': 2187, 'epoch': 1} {'type': 'loss', 'content': 0.04932447895407677, 'timestamp': '2025-09-30 23:04:05.547710', 'step': 2188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:05.602226', 'step': 2188, 'epoch': 1} {'type': 'loss', 'content': 0.023144643753767014, 'timestamp': '2025-09-30 23:04:05.604630', 'step': 2189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:05.658706', 'step': 2189, 'epoch': 1} {'type': 'loss', 'content': 0.010017909109592438, 'timestamp': '2025-09-30 23:04:05.660863', 'step': 2190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:05.716236', 'step': 2190, 'epoch': 1} {'type': 'loss', 'content': 0.008838661946356297, 'timestamp': '2025-09-30 23:04:05.720050', 'step': 2191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:05.783348', 'step': 2191, 'epoch': 1} {'type': 'loss', 'content': 0.0242274422198534, 'timestamp': '2025-09-30 23:04:05.792135', 'step': 2192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:05.855593', 'step': 2192, 'epoch': 1} {'type': 'loss', 'content': 0.012548894621431828, 'timestamp': '2025-09-30 23:04:05.858052', 'step': 2193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:05.917194', 'step': 2193, 'epoch': 1} {'type': 'loss', 'content': 0.03010389767587185, 'timestamp': '2025-09-30 23:04:05.919824', 'step': 2194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:05.976020', 'step': 2194, 'epoch': 1} {'type': 'loss', 'content': 0.0698883980512619, 'timestamp': '2025-09-30 23:04:05.978493', 'step': 2195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:06.046959', 'step': 2195, 'epoch': 1} {'type': 'loss', 'content': 0.035208810120821, 'timestamp': '2025-09-30 23:04:06.054774', 'step': 2196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:06.109931', 'step': 2196, 'epoch': 1} {'type': 'loss', 'content': 0.04375612363219261, 'timestamp': '2025-09-30 23:04:06.113846', 'step': 2197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:06.169699', 'step': 2197, 'epoch': 1} {'type': 'loss', 'content': 0.015401246026158333, 'timestamp': '2025-09-30 23:04:06.173027', 'step': 2198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:06.227925', 'step': 2198, 'epoch': 1} {'type': 'loss', 'content': 0.01589675061404705, 'timestamp': '2025-09-30 23:04:06.230262', 'step': 2199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:06.284301', 'step': 2199, 'epoch': 1} {'type': 'loss', 'content': 0.05999916419386864, 'timestamp': '2025-09-30 23:04:06.291135', 'step': 2200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:06.346699', 'step': 2200, 'epoch': 1} {'type': 'loss', 'content': 0.04296047240495682, 'timestamp': '2025-09-30 23:04:06.349024', 'step': 2201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:06.404293', 'step': 2201, 'epoch': 1} {'type': 'loss', 'content': 0.039937347173690796, 'timestamp': '2025-09-30 23:04:06.406734', 'step': 2202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:06.462002', 'step': 2202, 'epoch': 1} {'type': 'loss', 'content': 0.030331537127494812, 'timestamp': '2025-09-30 23:04:06.464983', 'step': 2203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:06.523471', 'step': 2203, 'epoch': 1} {'type': 'loss', 'content': 0.023812483996152878, 'timestamp': '2025-09-30 23:04:06.531227', 'step': 2204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:06.585977', 'step': 2204, 'epoch': 1} {'type': 'loss', 'content': 0.06122920289635658, 'timestamp': '2025-09-30 23:04:06.588342', 'step': 2205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:06.645463', 'step': 2205, 'epoch': 1} {'type': 'loss', 'content': 0.07968456298112869, 'timestamp': '2025-09-30 23:04:06.647971', 'step': 2206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:06.705181', 'step': 2206, 'epoch': 1} {'type': 'loss', 'content': 0.008634527213871479, 'timestamp': '2025-09-30 23:04:06.708162', 'step': 2207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:06.764228', 'step': 2207, 'epoch': 1} {'type': 'loss', 'content': 0.02150072529911995, 'timestamp': '2025-09-30 23:04:06.773385', 'step': 2208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:06.835713', 'step': 2208, 'epoch': 1} {'type': 'loss', 'content': 0.04794096574187279, 'timestamp': '2025-09-30 23:04:06.841714', 'step': 2209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:06.900590', 'step': 2209, 'epoch': 1} {'type': 'loss', 'content': 0.02451067790389061, 'timestamp': '2025-09-30 23:04:06.911224', 'step': 2210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:06.981984', 'step': 2210, 'epoch': 1} {'type': 'loss', 'content': 0.016657883301377296, 'timestamp': '2025-09-30 23:04:06.984017', 'step': 2211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:07.042381', 'step': 2211, 'epoch': 1} {'type': 'loss', 'content': 0.014601719565689564, 'timestamp': '2025-09-30 23:04:07.050949', 'step': 2212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:07.105664', 'step': 2212, 'epoch': 1} {'type': 'loss', 'content': 0.044326163828372955, 'timestamp': '2025-09-30 23:04:07.108740', 'step': 2213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:07.162004', 'step': 2213, 'epoch': 1} {'type': 'loss', 'content': 0.016769956797361374, 'timestamp': '2025-09-30 23:04:07.164299', 'step': 2214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:07.217169', 'step': 2214, 'epoch': 1} {'type': 'loss', 'content': 0.03106210008263588, 'timestamp': '2025-09-30 23:04:07.219350', 'step': 2215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:07.274538', 'step': 2215, 'epoch': 1} {'type': 'loss', 'content': 0.06148376315832138, 'timestamp': '2025-09-30 23:04:07.282311', 'step': 2216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:07.341310', 'step': 2216, 'epoch': 1} {'type': 'loss', 'content': 0.011087465099990368, 'timestamp': '2025-09-30 23:04:07.344053', 'step': 2217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:07.403930', 'step': 2217, 'epoch': 1} {'type': 'loss', 'content': 0.021607886999845505, 'timestamp': '2025-09-30 23:04:07.407396', 'step': 2218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:07.467312', 'step': 2218, 'epoch': 1} {'type': 'loss', 'content': 0.022698845714330673, 'timestamp': '2025-09-30 23:04:07.469803', 'step': 2219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:07.530226', 'step': 2219, 'epoch': 1} {'type': 'loss', 'content': 0.006356710102409124, 'timestamp': '2025-09-30 23:04:07.536930', 'step': 2220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:07.589429', 'step': 2220, 'epoch': 1} {'type': 'loss', 'content': 0.02493828907608986, 'timestamp': '2025-09-30 23:04:07.591960', 'step': 2221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:07.647447', 'step': 2221, 'epoch': 1} {'type': 'loss', 'content': 0.013796581886708736, 'timestamp': '2025-09-30 23:04:07.649859', 'step': 2222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:07.702755', 'step': 2222, 'epoch': 1} {'type': 'loss', 'content': 0.014327640645205975, 'timestamp': '2025-09-30 23:04:07.706439', 'step': 2223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:07.760156', 'step': 2223, 'epoch': 1} {'type': 'loss', 'content': 0.016310174018144608, 'timestamp': '2025-09-30 23:04:07.767728', 'step': 2224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:07.822177', 'step': 2224, 'epoch': 1} {'type': 'loss', 'content': 0.024448353797197342, 'timestamp': '2025-09-30 23:04:07.827173', 'step': 2225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:07.885898', 'step': 2225, 'epoch': 1} {'type': 'loss', 'content': 0.03397948667407036, 'timestamp': '2025-09-30 23:04:07.888140', 'step': 2226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:07.942868', 'step': 2226, 'epoch': 1} {'type': 'loss', 'content': 0.030120551586151123, 'timestamp': '2025-09-30 23:04:07.945413', 'step': 2227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:07.998669', 'step': 2227, 'epoch': 1} {'type': 'loss', 'content': 0.05222606286406517, 'timestamp': '2025-09-30 23:04:08.005580', 'step': 2228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:08.060271', 'step': 2228, 'epoch': 1} {'type': 'loss', 'content': 0.03320906311273575, 'timestamp': '2025-09-30 23:04:08.063123', 'step': 2229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:08.116587', 'step': 2229, 'epoch': 1} {'type': 'loss', 'content': 0.011759418062865734, 'timestamp': '2025-09-30 23:04:08.120734', 'step': 2230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:08.175206', 'step': 2230, 'epoch': 1} {'type': 'loss', 'content': 0.02419036626815796, 'timestamp': '2025-09-30 23:04:08.177614', 'step': 2231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:08.231128', 'step': 2231, 'epoch': 1} {'type': 'loss', 'content': 0.009600259363651276, 'timestamp': '2025-09-30 23:04:08.237574', 'step': 2232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:08.289582', 'step': 2232, 'epoch': 1} {'type': 'loss', 'content': 0.044076479971408844, 'timestamp': '2025-09-30 23:04:08.291636', 'step': 2233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:08.345352', 'step': 2233, 'epoch': 1} {'type': 'loss', 'content': 0.02893388830125332, 'timestamp': '2025-09-30 23:04:08.348166', 'step': 2234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:08.401582', 'step': 2234, 'epoch': 1} {'type': 'loss', 'content': 0.02047020010650158, 'timestamp': '2025-09-30 23:04:08.404418', 'step': 2235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:08.457781', 'step': 2235, 'epoch': 1} {'type': 'loss', 'content': 0.04355967044830322, 'timestamp': '2025-09-30 23:04:08.463899', 'step': 2236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:08.516826', 'step': 2236, 'epoch': 1} {'type': 'loss', 'content': 0.026157349348068237, 'timestamp': '2025-09-30 23:04:08.519586', 'step': 2237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:08.572100', 'step': 2237, 'epoch': 1} {'type': 'loss', 'content': 0.03165573254227638, 'timestamp': '2025-09-30 23:04:08.574644', 'step': 2238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:08.628562', 'step': 2238, 'epoch': 1} {'type': 'loss', 'content': 0.03671281412243843, 'timestamp': '2025-09-30 23:04:08.631151', 'step': 2239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:08.689343', 'step': 2239, 'epoch': 1} {'type': 'loss', 'content': 0.011579914949834347, 'timestamp': '2025-09-30 23:04:08.695062', 'step': 2240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:08.752617', 'step': 2240, 'epoch': 1} {'type': 'loss', 'content': 0.025964384898543358, 'timestamp': '2025-09-30 23:04:08.754763', 'step': 2241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:08.808472', 'step': 2241, 'epoch': 1} {'type': 'loss', 'content': 0.02353760413825512, 'timestamp': '2025-09-30 23:04:08.812250', 'step': 2242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:08.868719', 'step': 2242, 'epoch': 1} {'type': 'loss', 'content': 0.03250106796622276, 'timestamp': '2025-09-30 23:04:08.871401', 'step': 2243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:08.927408', 'step': 2243, 'epoch': 1} {'type': 'loss', 'content': 0.031161967664957047, 'timestamp': '2025-09-30 23:04:08.933029', 'step': 2244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:08.984888', 'step': 2244, 'epoch': 1} {'type': 'loss', 'content': 0.019775236025452614, 'timestamp': '2025-09-30 23:04:08.987770', 'step': 2245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:09.040399', 'step': 2245, 'epoch': 1} {'type': 'loss', 'content': 0.05697902664542198, 'timestamp': '2025-09-30 23:04:09.042769', 'step': 2246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:09.095855', 'step': 2246, 'epoch': 1} {'type': 'loss', 'content': 0.05341336876153946, 'timestamp': '2025-09-30 23:04:09.100463', 'step': 2247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:09.155197', 'step': 2247, 'epoch': 1} {'type': 'loss', 'content': 0.023295775055885315, 'timestamp': '2025-09-30 23:04:09.161611', 'step': 2248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:09.213438', 'step': 2248, 'epoch': 1} {'type': 'loss', 'content': 0.02060202695429325, 'timestamp': '2025-09-30 23:04:09.215588', 'step': 2249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:09.271434', 'step': 2249, 'epoch': 1} {'type': 'loss', 'content': 0.005905879195779562, 'timestamp': '2025-09-30 23:04:09.273598', 'step': 2250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:09.326716', 'step': 2250, 'epoch': 1} {'type': 'loss', 'content': 0.025679513812065125, 'timestamp': '2025-09-30 23:04:09.328774', 'step': 2251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:09.383188', 'step': 2251, 'epoch': 1} {'type': 'loss', 'content': 0.00821367371827364, 'timestamp': '2025-09-30 23:04:09.390518', 'step': 2252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:09.448420', 'step': 2252, 'epoch': 1} {'type': 'loss', 'content': 0.0482073649764061, 'timestamp': '2025-09-30 23:04:09.451422', 'step': 2253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:09.505534', 'step': 2253, 'epoch': 1} {'type': 'loss', 'content': 0.0413622222840786, 'timestamp': '2025-09-30 23:04:09.508018', 'step': 2254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:09.562101', 'step': 2254, 'epoch': 1} {'type': 'loss', 'content': 0.007507979869842529, 'timestamp': '2025-09-30 23:04:09.564717', 'step': 2255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:09.618624', 'step': 2255, 'epoch': 1} {'type': 'loss', 'content': 0.061744604259729385, 'timestamp': '2025-09-30 23:04:09.624476', 'step': 2256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:09.676822', 'step': 2256, 'epoch': 1} {'type': 'loss', 'content': 0.010257468558847904, 'timestamp': '2025-09-30 23:04:09.679127', 'step': 2257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:09.732780', 'step': 2257, 'epoch': 1} {'type': 'loss', 'content': 0.07054788619279861, 'timestamp': '2025-09-30 23:04:09.735009', 'step': 2258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:09.788348', 'step': 2258, 'epoch': 1} {'type': 'loss', 'content': 0.021863142028450966, 'timestamp': '2025-09-30 23:04:09.793338', 'step': 2259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:09.851384', 'step': 2259, 'epoch': 1} {'type': 'loss', 'content': 0.01289987750351429, 'timestamp': '2025-09-30 23:04:09.858009', 'step': 2260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:09.911120', 'step': 2260, 'epoch': 1} {'type': 'loss', 'content': 0.026750443503260612, 'timestamp': '2025-09-30 23:04:09.913604', 'step': 2261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:09.968732', 'step': 2261, 'epoch': 1} {'type': 'loss', 'content': 0.05262468382716179, 'timestamp': '2025-09-30 23:04:09.972315', 'step': 2262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:10.027012', 'step': 2262, 'epoch': 1} {'type': 'loss', 'content': 0.013748393394052982, 'timestamp': '2025-09-30 23:04:10.029221', 'step': 2263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:10.081679', 'step': 2263, 'epoch': 1} {'type': 'loss', 'content': 0.041542600840330124, 'timestamp': '2025-09-30 23:04:10.087488', 'step': 2264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:10.141159', 'step': 2264, 'epoch': 1} {'type': 'loss', 'content': 0.026223475113511086, 'timestamp': '2025-09-30 23:04:10.143502', 'step': 2265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:10.196932', 'step': 2265, 'epoch': 1} {'type': 'loss', 'content': 0.06600312888622284, 'timestamp': '2025-09-30 23:04:10.200714', 'step': 2266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:10.255649', 'step': 2266, 'epoch': 1} {'type': 'loss', 'content': 0.040492165833711624, 'timestamp': '2025-09-30 23:04:10.258617', 'step': 2267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:10.312377', 'step': 2267, 'epoch': 1} {'type': 'loss', 'content': 0.025491755455732346, 'timestamp': '2025-09-30 23:04:10.318444', 'step': 2268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:10.374278', 'step': 2268, 'epoch': 1} {'type': 'loss', 'content': 0.040831781923770905, 'timestamp': '2025-09-30 23:04:10.376542', 'step': 2269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:10.429722', 'step': 2269, 'epoch': 1} {'type': 'loss', 'content': 0.058168794959783554, 'timestamp': '2025-09-30 23:04:10.432736', 'step': 2270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:10.488612', 'step': 2270, 'epoch': 1} {'type': 'loss', 'content': 0.009389401413500309, 'timestamp': '2025-09-30 23:04:10.492786', 'step': 2271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:10.551812', 'step': 2271, 'epoch': 1} {'type': 'loss', 'content': 0.04132861644029617, 'timestamp': '2025-09-30 23:04:10.559041', 'step': 2272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:10.616341', 'step': 2272, 'epoch': 1} {'type': 'loss', 'content': 0.0375480130314827, 'timestamp': '2025-09-30 23:04:10.619220', 'step': 2273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:10.685935', 'step': 2273, 'epoch': 1} {'type': 'loss', 'content': 0.013141453266143799, 'timestamp': '2025-09-30 23:04:10.689152', 'step': 2274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:10.743769', 'step': 2274, 'epoch': 1} {'type': 'loss', 'content': 0.022847244516015053, 'timestamp': '2025-09-30 23:04:10.749123', 'step': 2275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:10.804723', 'step': 2275, 'epoch': 1} {'type': 'loss', 'content': 0.018866252154111862, 'timestamp': '2025-09-30 23:04:10.812863', 'step': 2276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:10.878906', 'step': 2276, 'epoch': 1} {'type': 'loss', 'content': 0.03471839800477028, 'timestamp': '2025-09-30 23:04:10.883011', 'step': 2277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:10.938333', 'step': 2277, 'epoch': 1} {'type': 'loss', 'content': 0.03147469460964203, 'timestamp': '2025-09-30 23:04:10.942807', 'step': 2278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:11.001752', 'step': 2278, 'epoch': 1} {'type': 'loss', 'content': 0.004248549696058035, 'timestamp': '2025-09-30 23:04:11.005951', 'step': 2279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:11.061499', 'step': 2279, 'epoch': 1} {'type': 'loss', 'content': 0.03609532117843628, 'timestamp': '2025-09-30 23:04:11.068438', 'step': 2280, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1596914505344}], 'timestamp': '2025-09-30 23:04:15.875733', 'step': 2280, 'epoch': 1} {'type': 'pplx', 'content': 7085985.040827792, 'timestamp': '2025-09-30 23:04:15.883495', 'step': 2280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:15.948754', 'step': 2280, 'epoch': 1} {'type': 'loss', 'content': 0.04285815358161926, 'timestamp': '2025-09-30 23:04:15.958008', 'step': 2281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:16.028955', 'step': 2281, 'epoch': 1} {'type': 'loss', 'content': 0.019755635410547256, 'timestamp': '2025-09-30 23:04:16.035029', 'step': 2282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:16.106205', 'step': 2282, 'epoch': 1} {'type': 'loss', 'content': 0.007292157970368862, 'timestamp': '2025-09-30 23:04:16.114008', 'step': 2283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:16.190755', 'step': 2283, 'epoch': 1} {'type': 'loss', 'content': 0.05038648843765259, 'timestamp': '2025-09-30 23:04:16.204323', 'step': 2284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:16.269554', 'step': 2284, 'epoch': 1} {'type': 'loss', 'content': 0.014915583655238152, 'timestamp': '2025-09-30 23:04:16.273100', 'step': 2285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:16.335172', 'step': 2285, 'epoch': 1} {'type': 'loss', 'content': 0.014663994312286377, 'timestamp': '2025-09-30 23:04:16.338714', 'step': 2286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:16.403242', 'step': 2286, 'epoch': 1} {'type': 'loss', 'content': 0.03256000205874443, 'timestamp': '2025-09-30 23:04:16.407982', 'step': 2287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:16.483007', 'step': 2287, 'epoch': 1} {'type': 'loss', 'content': 0.011335964314639568, 'timestamp': '2025-09-30 23:04:16.494669', 'step': 2288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:16.574446', 'step': 2288, 'epoch': 1} {'type': 'loss', 'content': 0.02676028572022915, 'timestamp': '2025-09-30 23:04:16.583422', 'step': 2289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:16.654524', 'step': 2289, 'epoch': 1} {'type': 'loss', 'content': 0.055934783071279526, 'timestamp': '2025-09-30 23:04:16.661255', 'step': 2290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:16.728380', 'step': 2290, 'epoch': 1} {'type': 'loss', 'content': 0.016275430098176003, 'timestamp': '2025-09-30 23:04:16.737939', 'step': 2291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:04:16.815162', 'step': 2291, 'epoch': 1} {'type': 'loss', 'content': 0.015326060354709625, 'timestamp': '2025-09-30 23:04:16.829337', 'step': 2292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:16.908331', 'step': 2292, 'epoch': 1} {'type': 'loss', 'content': 0.00134446716401726, 'timestamp': '2025-09-30 23:04:16.918320', 'step': 2293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:16.990633', 'step': 2293, 'epoch': 1} {'type': 'loss', 'content': 0.03874998167157173, 'timestamp': '2025-09-30 23:04:16.998906', 'step': 2294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:17.063421', 'step': 2294, 'epoch': 1} {'type': 'loss', 'content': 0.04353959113359451, 'timestamp': '2025-09-30 23:04:17.068284', 'step': 2295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:17.134193', 'step': 2295, 'epoch': 1} {'type': 'loss', 'content': 0.01875356212258339, 'timestamp': '2025-09-30 23:04:17.140735', 'step': 2296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:17.208882', 'step': 2296, 'epoch': 1} {'type': 'loss', 'content': 0.02006847783923149, 'timestamp': '2025-09-30 23:04:17.214149', 'step': 2297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:17.302137', 'step': 2297, 'epoch': 1} {'type': 'loss', 'content': 0.019083525985479355, 'timestamp': '2025-09-30 23:04:17.313246', 'step': 2298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:17.383586', 'step': 2298, 'epoch': 1} {'type': 'loss', 'content': 0.017113318666815758, 'timestamp': '2025-09-30 23:04:17.387449', 'step': 2299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:17.464953', 'step': 2299, 'epoch': 1} {'type': 'loss', 'content': 0.021967167034745216, 'timestamp': '2025-09-30 23:04:17.475998', 'step': 2300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:17.546571', 'step': 2300, 'epoch': 1} {'type': 'loss', 'content': 0.018685800954699516, 'timestamp': '2025-09-30 23:04:17.556731', 'step': 2301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:17.631148', 'step': 2301, 'epoch': 1} {'type': 'loss', 'content': 0.0066076056100428104, 'timestamp': '2025-09-30 23:04:17.634524', 'step': 2302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:17.699747', 'step': 2302, 'epoch': 1} {'type': 'loss', 'content': 0.04802115634083748, 'timestamp': '2025-09-30 23:04:17.705829', 'step': 2303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:17.779505', 'step': 2303, 'epoch': 1} {'type': 'loss', 'content': 0.05348120257258415, 'timestamp': '2025-09-30 23:04:17.794447', 'step': 2304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:17.857523', 'step': 2304, 'epoch': 1} {'type': 'loss', 'content': 0.03936406597495079, 'timestamp': '2025-09-30 23:04:17.860946', 'step': 2305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:17.923632', 'step': 2305, 'epoch': 1} {'type': 'loss', 'content': 0.007284701801836491, 'timestamp': '2025-09-30 23:04:17.928202', 'step': 2306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:18.002220', 'step': 2306, 'epoch': 1} {'type': 'loss', 'content': 0.0071279192343354225, 'timestamp': '2025-09-30 23:04:18.010096', 'step': 2307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:18.082125', 'step': 2307, 'epoch': 1} {'type': 'loss', 'content': 0.01102528627961874, 'timestamp': '2025-09-30 23:04:18.096373', 'step': 2308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:18.163904', 'step': 2308, 'epoch': 1} {'type': 'loss', 'content': 0.025731071829795837, 'timestamp': '2025-09-30 23:04:18.168618', 'step': 2309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:18.230887', 'step': 2309, 'epoch': 1} {'type': 'loss', 'content': 0.04201837256550789, 'timestamp': '2025-09-30 23:04:18.233402', 'step': 2310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:18.305762', 'step': 2310, 'epoch': 1} {'type': 'loss', 'content': 0.03581332042813301, 'timestamp': '2025-09-30 23:04:18.312211', 'step': 2311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:18.382272', 'step': 2311, 'epoch': 1} {'type': 'loss', 'content': 0.020482029765844345, 'timestamp': '2025-09-30 23:04:18.393630', 'step': 2312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:18.466275', 'step': 2312, 'epoch': 1} {'type': 'loss', 'content': 0.04863416403532028, 'timestamp': '2025-09-30 23:04:18.472060', 'step': 2313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:18.537418', 'step': 2313, 'epoch': 1} {'type': 'loss', 'content': 0.05183255672454834, 'timestamp': '2025-09-30 23:04:18.546309', 'step': 2314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:18.615735', 'step': 2314, 'epoch': 1} {'type': 'loss', 'content': 0.006119648460298777, 'timestamp': '2025-09-30 23:04:18.622291', 'step': 2315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:18.692294', 'step': 2315, 'epoch': 1} {'type': 'loss', 'content': 0.06490863859653473, 'timestamp': '2025-09-30 23:04:18.702229', 'step': 2316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:18.771694', 'step': 2316, 'epoch': 1} {'type': 'loss', 'content': 0.02769101969897747, 'timestamp': '2025-09-30 23:04:18.779646', 'step': 2317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:18.847765', 'step': 2317, 'epoch': 1} {'type': 'loss', 'content': 0.001932455925270915, 'timestamp': '2025-09-30 23:04:18.850767', 'step': 2318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:18.915894', 'step': 2318, 'epoch': 1} {'type': 'loss', 'content': 0.030967725440859795, 'timestamp': '2025-09-30 23:04:18.927212', 'step': 2319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:18.993495', 'step': 2319, 'epoch': 1} {'type': 'loss', 'content': 0.03372972831130028, 'timestamp': '2025-09-30 23:04:19.000030', 'step': 2320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:19.070796', 'step': 2320, 'epoch': 1} {'type': 'loss', 'content': 0.009269960224628448, 'timestamp': '2025-09-30 23:04:19.076469', 'step': 2321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:19.136781', 'step': 2321, 'epoch': 1} {'type': 'loss', 'content': 0.03429850563406944, 'timestamp': '2025-09-30 23:04:19.139889', 'step': 2322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:19.211529', 'step': 2322, 'epoch': 1} {'type': 'loss', 'content': 0.016517682000994682, 'timestamp': '2025-09-30 23:04:19.216216', 'step': 2323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:19.279180', 'step': 2323, 'epoch': 1} {'type': 'loss', 'content': 0.04285649582743645, 'timestamp': '2025-09-30 23:04:19.292219', 'step': 2324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:19.356677', 'step': 2324, 'epoch': 1} {'type': 'loss', 'content': 0.010094752535223961, 'timestamp': '2025-09-30 23:04:19.362730', 'step': 2325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:19.431638', 'step': 2325, 'epoch': 1} {'type': 'loss', 'content': 0.038308896124362946, 'timestamp': '2025-09-30 23:04:19.435938', 'step': 2326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:19.518612', 'step': 2326, 'epoch': 1} {'type': 'loss', 'content': 0.022600948810577393, 'timestamp': '2025-09-30 23:04:19.528819', 'step': 2327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:19.594370', 'step': 2327, 'epoch': 1} {'type': 'loss', 'content': 0.015502999536693096, 'timestamp': '2025-09-30 23:04:19.603469', 'step': 2328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:19.676522', 'step': 2328, 'epoch': 1} {'type': 'loss', 'content': 0.003818277968093753, 'timestamp': '2025-09-30 23:04:19.683681', 'step': 2329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:19.750913', 'step': 2329, 'epoch': 1} {'type': 'loss', 'content': 0.01862119697034359, 'timestamp': '2025-09-30 23:04:19.757588', 'step': 2330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:19.824503', 'step': 2330, 'epoch': 1} {'type': 'loss', 'content': 0.048953089863061905, 'timestamp': '2025-09-30 23:04:19.834564', 'step': 2331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:19.910605', 'step': 2331, 'epoch': 1} {'type': 'loss', 'content': 0.021246755495667458, 'timestamp': '2025-09-30 23:04:19.917546', 'step': 2332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:20.007634', 'step': 2332, 'epoch': 1} {'type': 'loss', 'content': 0.00580413406714797, 'timestamp': '2025-09-30 23:04:20.014893', 'step': 2333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:20.083143', 'step': 2333, 'epoch': 1} {'type': 'loss', 'content': 0.04230070486664772, 'timestamp': '2025-09-30 23:04:20.092801', 'step': 2334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:20.164343', 'step': 2334, 'epoch': 1} {'type': 'loss', 'content': 0.03262932971119881, 'timestamp': '2025-09-30 23:04:20.173211', 'step': 2335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:20.248107', 'step': 2335, 'epoch': 1} {'type': 'loss', 'content': 0.06394227594137192, 'timestamp': '2025-09-30 23:04:20.255118', 'step': 2336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:20.325897', 'step': 2336, 'epoch': 1} {'type': 'loss', 'content': 0.027994250878691673, 'timestamp': '2025-09-30 23:04:20.334400', 'step': 2337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:20.407778', 'step': 2337, 'epoch': 1} {'type': 'loss', 'content': 0.027555402368307114, 'timestamp': '2025-09-30 23:04:20.416804', 'step': 2338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:20.479771', 'step': 2338, 'epoch': 1} {'type': 'loss', 'content': 0.023568755015730858, 'timestamp': '2025-09-30 23:04:20.482771', 'step': 2339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:20.546546', 'step': 2339, 'epoch': 1} {'type': 'loss', 'content': 0.019940262660384178, 'timestamp': '2025-09-30 23:04:20.553830', 'step': 2340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:20.634565', 'step': 2340, 'epoch': 1} {'type': 'loss', 'content': 0.030506556853652, 'timestamp': '2025-09-30 23:04:20.638281', 'step': 2341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:20.711213', 'step': 2341, 'epoch': 1} {'type': 'loss', 'content': 0.09066752344369888, 'timestamp': '2025-09-30 23:04:20.719341', 'step': 2342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:20.799704', 'step': 2342, 'epoch': 1} {'type': 'loss', 'content': 0.05183682218194008, 'timestamp': '2025-09-30 23:04:20.812023', 'step': 2343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:20.888850', 'step': 2343, 'epoch': 1} {'type': 'loss', 'content': 0.018777860328555107, 'timestamp': '2025-09-30 23:04:20.901840', 'step': 2344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:20.977148', 'step': 2344, 'epoch': 1} {'type': 'loss', 'content': 0.0356762558221817, 'timestamp': '2025-09-30 23:04:20.989552', 'step': 2345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:21.061493', 'step': 2345, 'epoch': 1} {'type': 'loss', 'content': 0.016476120799779892, 'timestamp': '2025-09-30 23:04:21.076394', 'step': 2346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:21.135306', 'step': 2346, 'epoch': 1} {'type': 'loss', 'content': 0.017330212518572807, 'timestamp': '2025-09-30 23:04:21.139421', 'step': 2347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:21.219721', 'step': 2347, 'epoch': 1} {'type': 'loss', 'content': 0.05208304896950722, 'timestamp': '2025-09-30 23:04:21.236121', 'step': 2348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:21.313299', 'step': 2348, 'epoch': 1} {'type': 'loss', 'content': 0.047737594693899155, 'timestamp': '2025-09-30 23:04:21.321232', 'step': 2349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:21.391041', 'step': 2349, 'epoch': 1} {'type': 'loss', 'content': 0.03383076936006546, 'timestamp': '2025-09-30 23:04:21.399785', 'step': 2350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:21.481497', 'step': 2350, 'epoch': 1} {'type': 'loss', 'content': 0.009033141657710075, 'timestamp': '2025-09-30 23:04:21.490062', 'step': 2351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:21.567830', 'step': 2351, 'epoch': 1} {'type': 'loss', 'content': 0.010450074449181557, 'timestamp': '2025-09-30 23:04:21.585924', 'step': 2352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:21.674727', 'step': 2352, 'epoch': 1} {'type': 'loss', 'content': 0.0047785392962396145, 'timestamp': '2025-09-30 23:04:21.685836', 'step': 2353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:04:21.768756', 'step': 2353, 'epoch': 1} {'type': 'loss', 'content': 0.07820451259613037, 'timestamp': '2025-09-30 23:04:21.780808', 'step': 2354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:21.864869', 'step': 2354, 'epoch': 1} {'type': 'loss', 'content': 0.02433188259601593, 'timestamp': '2025-09-30 23:04:21.875922', 'step': 2355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:21.965046', 'step': 2355, 'epoch': 1} {'type': 'loss', 'content': 0.07644982635974884, 'timestamp': '2025-09-30 23:04:21.980445', 'step': 2356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:22.068466', 'step': 2356, 'epoch': 1} {'type': 'loss', 'content': 0.008189543150365353, 'timestamp': '2025-09-30 23:04:22.080945', 'step': 2357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:22.162648', 'step': 2357, 'epoch': 1} {'type': 'loss', 'content': 0.012906181626021862, 'timestamp': '2025-09-30 23:04:22.177695', 'step': 2358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:22.264139', 'step': 2358, 'epoch': 1} {'type': 'loss', 'content': 0.025811469182372093, 'timestamp': '2025-09-30 23:04:22.278940', 'step': 2359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:22.367176', 'step': 2359, 'epoch': 1} {'type': 'loss', 'content': 0.05145661160349846, 'timestamp': '2025-09-30 23:04:22.374314', 'step': 2360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:22.441332', 'step': 2360, 'epoch': 1} {'type': 'loss', 'content': 0.06936079263687134, 'timestamp': '2025-09-30 23:04:22.445399', 'step': 2361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:22.531602', 'step': 2361, 'epoch': 1} {'type': 'loss', 'content': 0.037387896329164505, 'timestamp': '2025-09-30 23:04:22.543484', 'step': 2362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:22.633847', 'step': 2362, 'epoch': 1} {'type': 'loss', 'content': 0.037107355892658234, 'timestamp': '2025-09-30 23:04:22.651192', 'step': 2363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:22.738904', 'step': 2363, 'epoch': 1} {'type': 'loss', 'content': 0.029844803735613823, 'timestamp': '2025-09-30 23:04:22.746876', 'step': 2364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:22.823327', 'step': 2364, 'epoch': 1} {'type': 'loss', 'content': 0.023104827851057053, 'timestamp': '2025-09-30 23:04:22.835227', 'step': 2365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:22.924676', 'step': 2365, 'epoch': 1} {'type': 'loss', 'content': 0.034521717578172684, 'timestamp': '2025-09-30 23:04:22.940428', 'step': 2366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:23.027760', 'step': 2366, 'epoch': 1} {'type': 'loss', 'content': 0.022802889347076416, 'timestamp': '2025-09-30 23:04:23.040243', 'step': 2367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:23.120993', 'step': 2367, 'epoch': 1} {'type': 'loss', 'content': 0.01970750093460083, 'timestamp': '2025-09-30 23:04:23.130266', 'step': 2368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:23.190425', 'step': 2368, 'epoch': 1} {'type': 'loss', 'content': 0.04345150664448738, 'timestamp': '2025-09-30 23:04:23.194067', 'step': 2369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:23.256907', 'step': 2369, 'epoch': 1} {'type': 'loss', 'content': 0.027254987508058548, 'timestamp': '2025-09-30 23:04:23.270495', 'step': 2370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:23.360375', 'step': 2370, 'epoch': 1} {'type': 'loss', 'content': 0.05067194625735283, 'timestamp': '2025-09-30 23:04:23.372689', 'step': 2371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:23.437066', 'step': 2371, 'epoch': 1} {'type': 'loss', 'content': 0.008826729841530323, 'timestamp': '2025-09-30 23:04:23.453246', 'step': 2372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:23.533126', 'step': 2372, 'epoch': 1} {'type': 'loss', 'content': 0.01262108888477087, 'timestamp': '2025-09-30 23:04:23.542421', 'step': 2373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:23.634372', 'step': 2373, 'epoch': 1} {'type': 'loss', 'content': 0.04343436285853386, 'timestamp': '2025-09-30 23:04:23.649525', 'step': 2374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:23.712179', 'step': 2374, 'epoch': 1} {'type': 'loss', 'content': 0.008369720540940762, 'timestamp': '2025-09-30 23:04:23.722561', 'step': 2375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:23.829229', 'step': 2375, 'epoch': 1} {'type': 'loss', 'content': 0.027892833575606346, 'timestamp': '2025-09-30 23:04:23.841059', 'step': 2376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:23.928927', 'step': 2376, 'epoch': 1} {'type': 'loss', 'content': 0.03344667702913284, 'timestamp': '2025-09-30 23:04:23.938061', 'step': 2377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:24.023212', 'step': 2377, 'epoch': 1} {'type': 'loss', 'content': 0.03380469232797623, 'timestamp': '2025-09-30 23:04:24.038378', 'step': 2378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:24.132786', 'step': 2378, 'epoch': 1} {'type': 'loss', 'content': 0.02722148597240448, 'timestamp': '2025-09-30 23:04:24.137249', 'step': 2379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:24.225765', 'step': 2379, 'epoch': 1} {'type': 'loss', 'content': 0.016925232484936714, 'timestamp': '2025-09-30 23:04:24.243604', 'step': 2380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:24.319168', 'step': 2380, 'epoch': 1} {'type': 'loss', 'content': 0.039226505905389786, 'timestamp': '2025-09-30 23:04:24.333211', 'step': 2381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:24.424217', 'step': 2381, 'epoch': 1} {'type': 'loss', 'content': 0.03310151770710945, 'timestamp': '2025-09-30 23:04:24.428307', 'step': 2382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:24.526284', 'step': 2382, 'epoch': 1} {'type': 'loss', 'content': 0.019024107605218887, 'timestamp': '2025-09-30 23:04:24.531599', 'step': 2383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:24.611182', 'step': 2383, 'epoch': 1} {'type': 'loss', 'content': 0.031447116285562515, 'timestamp': '2025-09-30 23:04:24.622644', 'step': 2384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:24.698023', 'step': 2384, 'epoch': 1} {'type': 'loss', 'content': 0.03965466096997261, 'timestamp': '2025-09-30 23:04:24.711250', 'step': 2385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:24.803894', 'step': 2385, 'epoch': 1} {'type': 'loss', 'content': 0.0023671265225857496, 'timestamp': '2025-09-30 23:04:24.809009', 'step': 2386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:24.869643', 'step': 2386, 'epoch': 1} {'type': 'loss', 'content': 0.015779754146933556, 'timestamp': '2025-09-30 23:04:24.874501', 'step': 2387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:24.954597', 'step': 2387, 'epoch': 1} {'type': 'loss', 'content': 0.03665176406502724, 'timestamp': '2025-09-30 23:04:24.970037', 'step': 2388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:25.042345', 'step': 2388, 'epoch': 1} {'type': 'loss', 'content': 0.016218580305576324, 'timestamp': '2025-09-30 23:04:25.056375', 'step': 2389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:25.149933', 'step': 2389, 'epoch': 1} {'type': 'loss', 'content': 0.008473178371787071, 'timestamp': '2025-09-30 23:04:25.152684', 'step': 2390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:25.220134', 'step': 2390, 'epoch': 1} {'type': 'loss', 'content': 0.04804113134741783, 'timestamp': '2025-09-30 23:04:25.225065', 'step': 2391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:25.306364', 'step': 2391, 'epoch': 1} {'type': 'loss', 'content': 0.036599040031433105, 'timestamp': '2025-09-30 23:04:25.322948', 'step': 2392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:25.425812', 'step': 2392, 'epoch': 1} {'type': 'loss', 'content': 0.011090234853327274, 'timestamp': '2025-09-30 23:04:25.440321', 'step': 2393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:25.509999', 'step': 2393, 'epoch': 1} {'type': 'loss', 'content': 0.05146531015634537, 'timestamp': '2025-09-30 23:04:25.515865', 'step': 2394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:25.606584', 'step': 2394, 'epoch': 1} {'type': 'loss', 'content': 0.014519753865897655, 'timestamp': '2025-09-30 23:04:25.619492', 'step': 2395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:25.693034', 'step': 2395, 'epoch': 1} {'type': 'loss', 'content': 0.0438239648938179, 'timestamp': '2025-09-30 23:04:25.701082', 'step': 2396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:25.781990', 'step': 2396, 'epoch': 1} {'type': 'loss', 'content': 0.03079064004123211, 'timestamp': '2025-09-30 23:04:25.795533', 'step': 2397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:25.875500', 'step': 2397, 'epoch': 1} {'type': 'loss', 'content': 0.04517802596092224, 'timestamp': '2025-09-30 23:04:25.887043', 'step': 2398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:25.962285', 'step': 2398, 'epoch': 1} {'type': 'loss', 'content': 0.02720719762146473, 'timestamp': '2025-09-30 23:04:25.974399', 'step': 2399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:26.051890', 'step': 2399, 'epoch': 1} {'type': 'loss', 'content': 0.025210173800587654, 'timestamp': '2025-09-30 23:04:26.059309', 'step': 2400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:26.131127', 'step': 2400, 'epoch': 1} {'type': 'loss', 'content': 0.014384311623871326, 'timestamp': '2025-09-30 23:04:26.143554', 'step': 2401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:26.209216', 'step': 2401, 'epoch': 1} {'type': 'loss', 'content': 0.006711958441883326, 'timestamp': '2025-09-30 23:04:26.220715', 'step': 2402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:26.309732', 'step': 2402, 'epoch': 1} {'type': 'loss', 'content': 0.0185315553098917, 'timestamp': '2025-09-30 23:04:26.313856', 'step': 2403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:26.379871', 'step': 2403, 'epoch': 1} {'type': 'loss', 'content': 0.024144459515810013, 'timestamp': '2025-09-30 23:04:26.393156', 'step': 2404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:26.462206', 'step': 2404, 'epoch': 1} {'type': 'loss', 'content': 0.02842162549495697, 'timestamp': '2025-09-30 23:04:26.468512', 'step': 2405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:26.552138', 'step': 2405, 'epoch': 1} {'type': 'loss', 'content': 0.021610330790281296, 'timestamp': '2025-09-30 23:04:26.564855', 'step': 2406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:26.620381', 'step': 2406, 'epoch': 1} {'type': 'loss', 'content': 0.014206422492861748, 'timestamp': '2025-09-30 23:04:26.624735', 'step': 2407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:26.711323', 'step': 2407, 'epoch': 1} {'type': 'loss', 'content': 0.03065248392522335, 'timestamp': '2025-09-30 23:04:26.725300', 'step': 2408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:26.787729', 'step': 2408, 'epoch': 1} {'type': 'loss', 'content': 0.026199501007795334, 'timestamp': '2025-09-30 23:04:26.796748', 'step': 2409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:26.873138', 'step': 2409, 'epoch': 1} {'type': 'loss', 'content': 0.07064885646104813, 'timestamp': '2025-09-30 23:04:26.881805', 'step': 2410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:26.955536', 'step': 2410, 'epoch': 1} {'type': 'loss', 'content': 0.026353081688284874, 'timestamp': '2025-09-30 23:04:26.963882', 'step': 2411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:27.036160', 'step': 2411, 'epoch': 1} {'type': 'loss', 'content': 0.03443275764584541, 'timestamp': '2025-09-30 23:04:27.050931', 'step': 2412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:27.123246', 'step': 2412, 'epoch': 1} {'type': 'loss', 'content': 0.01922372356057167, 'timestamp': '2025-09-30 23:04:27.130462', 'step': 2413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:27.213206', 'step': 2413, 'epoch': 1} {'type': 'loss', 'content': 0.07268321514129639, 'timestamp': '2025-09-30 23:04:27.224863', 'step': 2414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:27.299907', 'step': 2414, 'epoch': 1} {'type': 'loss', 'content': 0.007269646506756544, 'timestamp': '2025-09-30 23:04:27.311255', 'step': 2415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:27.378465', 'step': 2415, 'epoch': 1} {'type': 'loss', 'content': 0.012832279317080975, 'timestamp': '2025-09-30 23:04:27.395565', 'step': 2416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:27.469375', 'step': 2416, 'epoch': 1} {'type': 'loss', 'content': 0.005243746098130941, 'timestamp': '2025-09-30 23:04:27.480027', 'step': 2417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:27.552152', 'step': 2417, 'epoch': 1} {'type': 'loss', 'content': 0.022589877247810364, 'timestamp': '2025-09-30 23:04:27.555957', 'step': 2418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:27.617076', 'step': 2418, 'epoch': 1} {'type': 'loss', 'content': 0.027425531297922134, 'timestamp': '2025-09-30 23:04:27.621008', 'step': 2419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:27.708010', 'step': 2419, 'epoch': 1} {'type': 'loss', 'content': 0.04487255588173866, 'timestamp': '2025-09-30 23:04:27.726322', 'step': 2420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:27.819803', 'step': 2420, 'epoch': 1} {'type': 'loss', 'content': 0.02556195668876171, 'timestamp': '2025-09-30 23:04:27.832796', 'step': 2421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:27.916664', 'step': 2421, 'epoch': 1} {'type': 'loss', 'content': 0.011625675484538078, 'timestamp': '2025-09-30 23:04:27.926994', 'step': 2422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:27.994081', 'step': 2422, 'epoch': 1} {'type': 'loss', 'content': 0.021493470296263695, 'timestamp': '2025-09-30 23:04:27.997714', 'step': 2423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:28.077014', 'step': 2423, 'epoch': 1} {'type': 'loss', 'content': 0.005435366649180651, 'timestamp': '2025-09-30 23:04:28.095570', 'step': 2424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:28.173996', 'step': 2424, 'epoch': 1} {'type': 'loss', 'content': 0.04129749536514282, 'timestamp': '2025-09-30 23:04:28.178521', 'step': 2425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:28.264319', 'step': 2425, 'epoch': 1} {'type': 'loss', 'content': 0.01665012165904045, 'timestamp': '2025-09-30 23:04:28.269468', 'step': 2426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:28.352963', 'step': 2426, 'epoch': 1} {'type': 'loss', 'content': 0.039169758558273315, 'timestamp': '2025-09-30 23:04:28.366435', 'step': 2427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:28.428024', 'step': 2427, 'epoch': 1} {'type': 'loss', 'content': 0.04069286957383156, 'timestamp': '2025-09-30 23:04:28.442970', 'step': 2428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:28.526224', 'step': 2428, 'epoch': 1} {'type': 'loss', 'content': 0.036520328372716904, 'timestamp': '2025-09-30 23:04:28.529555', 'step': 2429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:28.594066', 'step': 2429, 'epoch': 1} {'type': 'loss', 'content': 0.04298189654946327, 'timestamp': '2025-09-30 23:04:28.605303', 'step': 2430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:28.670731', 'step': 2430, 'epoch': 1} {'type': 'loss', 'content': 0.050619594752788544, 'timestamp': '2025-09-30 23:04:28.680791', 'step': 2431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:28.760161', 'step': 2431, 'epoch': 1} {'type': 'loss', 'content': 0.04392337054014206, 'timestamp': '2025-09-30 23:04:28.773172', 'step': 2432, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1596914505344}], 'timestamp': '2025-09-30 23:04:34.259512', 'step': 2432, 'epoch': 1} {'type': 'pplx', 'content': 5520180.20982813, 'timestamp': '2025-09-30 23:04:34.262707', 'step': 2432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:34.324686', 'step': 2432, 'epoch': 1} {'type': 'loss', 'content': 0.02858281135559082, 'timestamp': '2025-09-30 23:04:34.334537', 'step': 2433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:34.421234', 'step': 2433, 'epoch': 1} {'type': 'loss', 'content': 0.01647498831152916, 'timestamp': '2025-09-30 23:04:34.431159', 'step': 2434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:34.523747', 'step': 2434, 'epoch': 1} {'type': 'loss', 'content': 0.01564190723001957, 'timestamp': '2025-09-30 23:04:34.531656', 'step': 2435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [1, 80], 'flops': 400002507344.0}, 'timestamp': '2025-09-30 23:04:34.633400', 'step': 2435, 'epoch': 1} {'type': 'loss', 'content': 0.01514936238527298, 'timestamp': '2025-09-30 23:04:34.646579', 'step': 2436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:34.725400', 'step': 2436, 'epoch': 2} {'type': 'loss', 'content': 0.03748179227113724, 'timestamp': '2025-09-30 23:04:34.732620', 'step': 2437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:34.802384', 'step': 2437, 'epoch': 2} {'type': 'loss', 'content': 0.023955296725034714, 'timestamp': '2025-09-30 23:04:34.812998', 'step': 2438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:34.880510', 'step': 2438, 'epoch': 2} {'type': 'loss', 'content': 0.022072402760386467, 'timestamp': '2025-09-30 23:04:34.886109', 'step': 2439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:34.954892', 'step': 2439, 'epoch': 2} {'type': 'loss', 'content': 0.03631744533777237, 'timestamp': '2025-09-30 23:04:34.965991', 'step': 2440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:35.026176', 'step': 2440, 'epoch': 2} {'type': 'loss', 'content': 0.027954796329140663, 'timestamp': '2025-09-30 23:04:35.034132', 'step': 2441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:35.101574', 'step': 2441, 'epoch': 2} {'type': 'loss', 'content': 0.0047714486718177795, 'timestamp': '2025-09-30 23:04:35.107031', 'step': 2442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:35.180312', 'step': 2442, 'epoch': 2} {'type': 'loss', 'content': 0.02486480213701725, 'timestamp': '2025-09-30 23:04:35.189521', 'step': 2443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:35.268690', 'step': 2443, 'epoch': 2} {'type': 'loss', 'content': 0.0255562923848629, 'timestamp': '2025-09-30 23:04:35.284610', 'step': 2444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:35.365820', 'step': 2444, 'epoch': 2} {'type': 'loss', 'content': 0.02379547990858555, 'timestamp': '2025-09-30 23:04:35.370454', 'step': 2445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:35.447494', 'step': 2445, 'epoch': 2} {'type': 'loss', 'content': 0.024116117507219315, 'timestamp': '2025-09-30 23:04:35.453758', 'step': 2446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:35.517161', 'step': 2446, 'epoch': 2} {'type': 'loss', 'content': 0.03620293363928795, 'timestamp': '2025-09-30 23:04:35.520198', 'step': 2447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:35.590703', 'step': 2447, 'epoch': 2} {'type': 'loss', 'content': 0.030035363510251045, 'timestamp': '2025-09-30 23:04:35.602548', 'step': 2448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:35.673595', 'step': 2448, 'epoch': 2} {'type': 'loss', 'content': 0.0397552065551281, 'timestamp': '2025-09-30 23:04:35.676385', 'step': 2449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:35.736479', 'step': 2449, 'epoch': 2} {'type': 'loss', 'content': 0.023804843425750732, 'timestamp': '2025-09-30 23:04:35.742895', 'step': 2450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:35.827460', 'step': 2450, 'epoch': 2} {'type': 'loss', 'content': 0.0178503580391407, 'timestamp': '2025-09-30 23:04:35.831029', 'step': 2451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:35.899709', 'step': 2451, 'epoch': 2} {'type': 'loss', 'content': 0.040238384157419205, 'timestamp': '2025-09-30 23:04:35.908271', 'step': 2452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:35.973928', 'step': 2452, 'epoch': 2} {'type': 'loss', 'content': 0.0171901173889637, 'timestamp': '2025-09-30 23:04:35.981990', 'step': 2453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:36.039886', 'step': 2453, 'epoch': 2} {'type': 'loss', 'content': 0.010318133048713207, 'timestamp': '2025-09-30 23:04:36.044914', 'step': 2454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:36.105128', 'step': 2454, 'epoch': 2} {'type': 'loss', 'content': 0.03089836798608303, 'timestamp': '2025-09-30 23:04:36.109621', 'step': 2455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:36.175261', 'step': 2455, 'epoch': 2} {'type': 'loss', 'content': 0.020785251632332802, 'timestamp': '2025-09-30 23:04:36.182059', 'step': 2456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:36.242165', 'step': 2456, 'epoch': 2} {'type': 'loss', 'content': 0.013983592391014099, 'timestamp': '2025-09-30 23:04:36.248200', 'step': 2457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:36.315331', 'step': 2457, 'epoch': 2} {'type': 'loss', 'content': 0.029160652309656143, 'timestamp': '2025-09-30 23:04:36.319355', 'step': 2458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:36.382604', 'step': 2458, 'epoch': 2} {'type': 'loss', 'content': 0.0471518412232399, 'timestamp': '2025-09-30 23:04:36.388955', 'step': 2459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:36.445649', 'step': 2459, 'epoch': 2} {'type': 'loss', 'content': 0.012613654136657715, 'timestamp': '2025-09-30 23:04:36.460465', 'step': 2460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:36.537221', 'step': 2460, 'epoch': 2} {'type': 'loss', 'content': 0.019797934219241142, 'timestamp': '2025-09-30 23:04:36.541984', 'step': 2461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:36.618072', 'step': 2461, 'epoch': 2} {'type': 'loss', 'content': 0.028778521344065666, 'timestamp': '2025-09-30 23:04:36.622609', 'step': 2462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:36.689704', 'step': 2462, 'epoch': 2} {'type': 'loss', 'content': 0.004331079311668873, 'timestamp': '2025-09-30 23:04:36.692622', 'step': 2463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:36.748298', 'step': 2463, 'epoch': 2} {'type': 'loss', 'content': 0.02558368630707264, 'timestamp': '2025-09-30 23:04:36.757880', 'step': 2464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:36.816638', 'step': 2464, 'epoch': 2} {'type': 'loss', 'content': 0.0022440110333263874, 'timestamp': '2025-09-30 23:04:36.821993', 'step': 2465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:36.887812', 'step': 2465, 'epoch': 2} {'type': 'loss', 'content': 0.03196746110916138, 'timestamp': '2025-09-30 23:04:36.895859', 'step': 2466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:36.964478', 'step': 2466, 'epoch': 2} {'type': 'loss', 'content': 0.036870602518320084, 'timestamp': '2025-09-30 23:04:36.968811', 'step': 2467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:37.038614', 'step': 2467, 'epoch': 2} {'type': 'loss', 'content': 0.029536621645092964, 'timestamp': '2025-09-30 23:04:37.048615', 'step': 2468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:37.108842', 'step': 2468, 'epoch': 2} {'type': 'loss', 'content': 0.04718181863427162, 'timestamp': '2025-09-30 23:04:37.115298', 'step': 2469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:37.182718', 'step': 2469, 'epoch': 2} {'type': 'loss', 'content': 0.030167488381266594, 'timestamp': '2025-09-30 23:04:37.190824', 'step': 2470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:37.259730', 'step': 2470, 'epoch': 2} {'type': 'loss', 'content': 0.014851344749331474, 'timestamp': '2025-09-30 23:04:37.264242', 'step': 2471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:37.325121', 'step': 2471, 'epoch': 2} {'type': 'loss', 'content': 0.04011405631899834, 'timestamp': '2025-09-30 23:04:37.332866', 'step': 2472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:37.394881', 'step': 2472, 'epoch': 2} {'type': 'loss', 'content': 0.016647230833768845, 'timestamp': '2025-09-30 23:04:37.401643', 'step': 2473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:37.462814', 'step': 2473, 'epoch': 2} {'type': 'loss', 'content': 0.01997121050953865, 'timestamp': '2025-09-30 23:04:37.470440', 'step': 2474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:37.544705', 'step': 2474, 'epoch': 2} {'type': 'loss', 'content': 0.006017040461301804, 'timestamp': '2025-09-30 23:04:37.557959', 'step': 2475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:37.649839', 'step': 2475, 'epoch': 2} {'type': 'loss', 'content': 0.014905895106494427, 'timestamp': '2025-09-30 23:04:37.662109', 'step': 2476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:37.731154', 'step': 2476, 'epoch': 2} {'type': 'loss', 'content': 0.036130141466856, 'timestamp': '2025-09-30 23:04:37.735930', 'step': 2477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:37.794441', 'step': 2477, 'epoch': 2} {'type': 'loss', 'content': 0.007345862686634064, 'timestamp': '2025-09-30 23:04:37.801712', 'step': 2478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:37.862750', 'step': 2478, 'epoch': 2} {'type': 'loss', 'content': 0.005803587846457958, 'timestamp': '2025-09-30 23:04:37.871143', 'step': 2479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:37.933211', 'step': 2479, 'epoch': 2} {'type': 'loss', 'content': 0.05288049578666687, 'timestamp': '2025-09-30 23:04:37.940958', 'step': 2480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:38.002567', 'step': 2480, 'epoch': 2} {'type': 'loss', 'content': 0.009808054193854332, 'timestamp': '2025-09-30 23:04:38.010082', 'step': 2481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:38.079624', 'step': 2481, 'epoch': 2} {'type': 'loss', 'content': 0.023977169767022133, 'timestamp': '2025-09-30 23:04:38.086919', 'step': 2482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:38.161012', 'step': 2482, 'epoch': 2} {'type': 'loss', 'content': 0.008124719373881817, 'timestamp': '2025-09-30 23:04:38.163708', 'step': 2483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:38.230165', 'step': 2483, 'epoch': 2} {'type': 'loss', 'content': 0.04039223864674568, 'timestamp': '2025-09-30 23:04:38.243225', 'step': 2484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:38.309851', 'step': 2484, 'epoch': 2} {'type': 'loss', 'content': 0.06222536042332649, 'timestamp': '2025-09-30 23:04:38.312316', 'step': 2485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:38.368954', 'step': 2485, 'epoch': 2} {'type': 'loss', 'content': 0.025637522339820862, 'timestamp': '2025-09-30 23:04:38.372417', 'step': 2486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:38.429215', 'step': 2486, 'epoch': 2} {'type': 'loss', 'content': 0.05742431432008743, 'timestamp': '2025-09-30 23:04:38.433314', 'step': 2487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:04:38.488272', 'step': 2487, 'epoch': 2} {'type': 'loss', 'content': 0.014571554027497768, 'timestamp': '2025-09-30 23:04:38.495296', 'step': 2488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:38.552189', 'step': 2488, 'epoch': 2} {'type': 'loss', 'content': 0.06983724981546402, 'timestamp': '2025-09-30 23:04:38.555241', 'step': 2489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:38.614074', 'step': 2489, 'epoch': 2} {'type': 'loss', 'content': 0.00984251219779253, 'timestamp': '2025-09-30 23:04:38.621246', 'step': 2490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:38.689430', 'step': 2490, 'epoch': 2} {'type': 'loss', 'content': 0.021823685616254807, 'timestamp': '2025-09-30 23:04:38.691762', 'step': 2491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:38.747687', 'step': 2491, 'epoch': 2} {'type': 'loss', 'content': 0.03700428456068039, 'timestamp': '2025-09-30 23:04:38.754499', 'step': 2492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:38.815889', 'step': 2492, 'epoch': 2} {'type': 'loss', 'content': 0.014213980175554752, 'timestamp': '2025-09-30 23:04:38.819825', 'step': 2493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:38.878567', 'step': 2493, 'epoch': 2} {'type': 'loss', 'content': 0.0263185054063797, 'timestamp': '2025-09-30 23:04:38.883154', 'step': 2494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:38.950229', 'step': 2494, 'epoch': 2} {'type': 'loss', 'content': 0.0036907377652823925, 'timestamp': '2025-09-30 23:04:38.955950', 'step': 2495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:39.035734', 'step': 2495, 'epoch': 2} {'type': 'loss', 'content': 0.07209312170743942, 'timestamp': '2025-09-30 23:04:39.051561', 'step': 2496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:39.122101', 'step': 2496, 'epoch': 2} {'type': 'loss', 'content': 0.01757369004189968, 'timestamp': '2025-09-30 23:04:39.130759', 'step': 2497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:39.188995', 'step': 2497, 'epoch': 2} {'type': 'loss', 'content': 0.021802261471748352, 'timestamp': '2025-09-30 23:04:39.195755', 'step': 2498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:39.270065', 'step': 2498, 'epoch': 2} {'type': 'loss', 'content': 0.03677591681480408, 'timestamp': '2025-09-30 23:04:39.273442', 'step': 2499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:39.350596', 'step': 2499, 'epoch': 2} {'type': 'loss', 'content': 0.027654411271214485, 'timestamp': '2025-09-30 23:04:39.359276', 'step': 2500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 2500', 'timestamp': '2025-09-30 23:04:39.876672', 'step': 2500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:39.944941', 'step': 2500, 'epoch': 2} {'type': 'loss', 'content': 0.043197065591812134, 'timestamp': '2025-09-30 23:04:39.952417', 'step': 2501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:40.018004', 'step': 2501, 'epoch': 2} {'type': 'loss', 'content': 0.018434567376971245, 'timestamp': '2025-09-30 23:04:40.020975', 'step': 2502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:40.088709', 'step': 2502, 'epoch': 2} {'type': 'loss', 'content': 0.013598998077213764, 'timestamp': '2025-09-30 23:04:40.100576', 'step': 2503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:40.164657', 'step': 2503, 'epoch': 2} {'type': 'loss', 'content': 0.02147083915770054, 'timestamp': '2025-09-30 23:04:40.172922', 'step': 2504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:40.242753', 'step': 2504, 'epoch': 2} {'type': 'loss', 'content': 0.0470912791788578, 'timestamp': '2025-09-30 23:04:40.247355', 'step': 2505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:40.319203', 'step': 2505, 'epoch': 2} {'type': 'loss', 'content': 0.01778932847082615, 'timestamp': '2025-09-30 23:04:40.331035', 'step': 2506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:40.400263', 'step': 2506, 'epoch': 2} {'type': 'loss', 'content': 0.025986867025494576, 'timestamp': '2025-09-30 23:04:40.408223', 'step': 2507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:40.473509', 'step': 2507, 'epoch': 2} {'type': 'loss', 'content': 0.01521240919828415, 'timestamp': '2025-09-30 23:04:40.485652', 'step': 2508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:40.559115', 'step': 2508, 'epoch': 2} {'type': 'loss', 'content': 0.03090648166835308, 'timestamp': '2025-09-30 23:04:40.561867', 'step': 2509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:40.629072', 'step': 2509, 'epoch': 2} {'type': 'loss', 'content': 0.028103481978178024, 'timestamp': '2025-09-30 23:04:40.634388', 'step': 2510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:40.705136', 'step': 2510, 'epoch': 2} {'type': 'loss', 'content': 0.04618477448821068, 'timestamp': '2025-09-30 23:04:40.708968', 'step': 2511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:40.774522', 'step': 2511, 'epoch': 2} {'type': 'loss', 'content': 0.01454758457839489, 'timestamp': '2025-09-30 23:04:40.782024', 'step': 2512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:40.846636', 'step': 2512, 'epoch': 2} {'type': 'loss', 'content': 0.03996746987104416, 'timestamp': '2025-09-30 23:04:40.860358', 'step': 2513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:40.934112', 'step': 2513, 'epoch': 2} {'type': 'loss', 'content': 0.03821779042482376, 'timestamp': '2025-09-30 23:04:40.940533', 'step': 2514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:41.014809', 'step': 2514, 'epoch': 2} {'type': 'loss', 'content': 0.04731085151433945, 'timestamp': '2025-09-30 23:04:41.022237', 'step': 2515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:41.101463', 'step': 2515, 'epoch': 2} {'type': 'loss', 'content': 0.009329461492598057, 'timestamp': '2025-09-30 23:04:41.111630', 'step': 2516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:04:41.189190', 'step': 2516, 'epoch': 2} {'type': 'loss', 'content': 0.019617481157183647, 'timestamp': '2025-09-30 23:04:41.206872', 'step': 2517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:41.294405', 'step': 2517, 'epoch': 2} {'type': 'loss', 'content': 0.06975273042917252, 'timestamp': '2025-09-30 23:04:41.301049', 'step': 2518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:41.388496', 'step': 2518, 'epoch': 2} {'type': 'loss', 'content': 0.024551505222916603, 'timestamp': '2025-09-30 23:04:41.402468', 'step': 2519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:41.471601', 'step': 2519, 'epoch': 2} {'type': 'loss', 'content': 0.03996415063738823, 'timestamp': '2025-09-30 23:04:41.481842', 'step': 2520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:41.553679', 'step': 2520, 'epoch': 2} {'type': 'loss', 'content': 0.028550541028380394, 'timestamp': '2025-09-30 23:04:41.563724', 'step': 2521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:41.633025', 'step': 2521, 'epoch': 2} {'type': 'loss', 'content': 0.03462577983736992, 'timestamp': '2025-09-30 23:04:41.640900', 'step': 2522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:41.721165', 'step': 2522, 'epoch': 2} {'type': 'loss', 'content': 0.024189108982682228, 'timestamp': '2025-09-30 23:04:41.723715', 'step': 2523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:41.789580', 'step': 2523, 'epoch': 2} {'type': 'loss', 'content': 0.023334315046668053, 'timestamp': '2025-09-30 23:04:41.797574', 'step': 2524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:41.854373', 'step': 2524, 'epoch': 2} {'type': 'loss', 'content': 0.009936700575053692, 'timestamp': '2025-09-30 23:04:41.861427', 'step': 2525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:41.925298', 'step': 2525, 'epoch': 2} {'type': 'loss', 'content': 0.028782619163393974, 'timestamp': '2025-09-30 23:04:41.929814', 'step': 2526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:41.986718', 'step': 2526, 'epoch': 2} {'type': 'loss', 'content': 0.037709031254053116, 'timestamp': '2025-09-30 23:04:41.992792', 'step': 2527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:42.058355', 'step': 2527, 'epoch': 2} {'type': 'loss', 'content': 0.018815794959664345, 'timestamp': '2025-09-30 23:04:42.077423', 'step': 2528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:42.163306', 'step': 2528, 'epoch': 2} {'type': 'loss', 'content': 0.026394622400403023, 'timestamp': '2025-09-30 23:04:42.179035', 'step': 2529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:42.255921', 'step': 2529, 'epoch': 2} {'type': 'loss', 'content': 0.02407309226691723, 'timestamp': '2025-09-30 23:04:42.268200', 'step': 2530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:42.335155', 'step': 2530, 'epoch': 2} {'type': 'loss', 'content': 0.05361609533429146, 'timestamp': '2025-09-30 23:04:42.344987', 'step': 2531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:42.415095', 'step': 2531, 'epoch': 2} {'type': 'loss', 'content': 0.015915144234895706, 'timestamp': '2025-09-30 23:04:42.422354', 'step': 2532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:42.509339', 'step': 2532, 'epoch': 2} {'type': 'loss', 'content': 0.026888491585850716, 'timestamp': '2025-09-30 23:04:42.514993', 'step': 2533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:42.573434', 'step': 2533, 'epoch': 2} {'type': 'loss', 'content': 0.010768184438347816, 'timestamp': '2025-09-30 23:04:42.581591', 'step': 2534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:42.646301', 'step': 2534, 'epoch': 2} {'type': 'loss', 'content': 0.02932138368487358, 'timestamp': '2025-09-30 23:04:42.654009', 'step': 2535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:42.727112', 'step': 2535, 'epoch': 2} {'type': 'loss', 'content': 0.023149151355028152, 'timestamp': '2025-09-30 23:04:42.736122', 'step': 2536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:42.809443', 'step': 2536, 'epoch': 2} {'type': 'loss', 'content': 0.03209424763917923, 'timestamp': '2025-09-30 23:04:42.814766', 'step': 2537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:42.890822', 'step': 2537, 'epoch': 2} {'type': 'loss', 'content': 0.03130074217915535, 'timestamp': '2025-09-30 23:04:42.894121', 'step': 2538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:42.971815', 'step': 2538, 'epoch': 2} {'type': 'loss', 'content': 0.025537734851241112, 'timestamp': '2025-09-30 23:04:42.985433', 'step': 2539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:43.052858', 'step': 2539, 'epoch': 2} {'type': 'loss', 'content': 0.010549122467637062, 'timestamp': '2025-09-30 23:04:43.061002', 'step': 2540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:43.120056', 'step': 2540, 'epoch': 2} {'type': 'loss', 'content': 0.005249322392046452, 'timestamp': '2025-09-30 23:04:43.124307', 'step': 2541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:43.200935', 'step': 2541, 'epoch': 2} {'type': 'loss', 'content': 0.009214838035404682, 'timestamp': '2025-09-30 23:04:43.206441', 'step': 2542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:43.280974', 'step': 2542, 'epoch': 2} {'type': 'loss', 'content': 0.03272704407572746, 'timestamp': '2025-09-30 23:04:43.290292', 'step': 2543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:43.359383', 'step': 2543, 'epoch': 2} {'type': 'loss', 'content': 0.024039609357714653, 'timestamp': '2025-09-30 23:04:43.370008', 'step': 2544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:43.431268', 'step': 2544, 'epoch': 2} {'type': 'loss', 'content': 0.005516710225492716, 'timestamp': '2025-09-30 23:04:43.436180', 'step': 2545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:43.500399', 'step': 2545, 'epoch': 2} {'type': 'loss', 'content': 0.017397787421941757, 'timestamp': '2025-09-30 23:04:43.504106', 'step': 2546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:43.569407', 'step': 2546, 'epoch': 2} {'type': 'loss', 'content': 0.05650252848863602, 'timestamp': '2025-09-30 23:04:43.573377', 'step': 2547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:43.649003', 'step': 2547, 'epoch': 2} {'type': 'loss', 'content': 0.021797586232423782, 'timestamp': '2025-09-30 23:04:43.658559', 'step': 2548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:43.730550', 'step': 2548, 'epoch': 2} {'type': 'loss', 'content': 0.036578528583049774, 'timestamp': '2025-09-30 23:04:43.747255', 'step': 2549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:43.806760', 'step': 2549, 'epoch': 2} {'type': 'loss', 'content': 0.01077184733003378, 'timestamp': '2025-09-30 23:04:43.811045', 'step': 2550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:43.872607', 'step': 2550, 'epoch': 2} {'type': 'loss', 'content': 0.050834186375141144, 'timestamp': '2025-09-30 23:04:43.875987', 'step': 2551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:43.944175', 'step': 2551, 'epoch': 2} {'type': 'loss', 'content': 0.018154779449105263, 'timestamp': '2025-09-30 23:04:43.950272', 'step': 2552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:44.018549', 'step': 2552, 'epoch': 2} {'type': 'loss', 'content': 0.05536854267120361, 'timestamp': '2025-09-30 23:04:44.023990', 'step': 2553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:44.083921', 'step': 2553, 'epoch': 2} {'type': 'loss', 'content': 0.02292168140411377, 'timestamp': '2025-09-30 23:04:44.088609', 'step': 2554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:44.165316', 'step': 2554, 'epoch': 2} {'type': 'loss', 'content': 0.005743970163166523, 'timestamp': '2025-09-30 23:04:44.174147', 'step': 2555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:44.243072', 'step': 2555, 'epoch': 2} {'type': 'loss', 'content': 0.017300019040703773, 'timestamp': '2025-09-30 23:04:44.262049', 'step': 2556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:44.343806', 'step': 2556, 'epoch': 2} {'type': 'loss', 'content': 0.003628707956522703, 'timestamp': '2025-09-30 23:04:44.362672', 'step': 2557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:44.426967', 'step': 2557, 'epoch': 2} {'type': 'loss', 'content': 0.010700762271881104, 'timestamp': '2025-09-30 23:04:44.430431', 'step': 2558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:44.495718', 'step': 2558, 'epoch': 2} {'type': 'loss', 'content': 0.04165082797408104, 'timestamp': '2025-09-30 23:04:44.507272', 'step': 2559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:44.569384', 'step': 2559, 'epoch': 2} {'type': 'loss', 'content': 0.034807540476322174, 'timestamp': '2025-09-30 23:04:44.575687', 'step': 2560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:44.641344', 'step': 2560, 'epoch': 2} {'type': 'loss', 'content': 0.05255342647433281, 'timestamp': '2025-09-30 23:04:44.645281', 'step': 2561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:44.703509', 'step': 2561, 'epoch': 2} {'type': 'loss', 'content': 0.004581231623888016, 'timestamp': '2025-09-30 23:04:44.706217', 'step': 2562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:44.772635', 'step': 2562, 'epoch': 2} {'type': 'loss', 'content': 0.05045652016997337, 'timestamp': '2025-09-30 23:04:44.778300', 'step': 2563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:44.851999', 'step': 2563, 'epoch': 2} {'type': 'loss', 'content': 0.02443154715001583, 'timestamp': '2025-09-30 23:04:44.863437', 'step': 2564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:44.926610', 'step': 2564, 'epoch': 2} {'type': 'loss', 'content': 0.01776694320142269, 'timestamp': '2025-09-30 23:04:44.929565', 'step': 2565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:45.002313', 'step': 2565, 'epoch': 2} {'type': 'loss', 'content': 0.003834755392745137, 'timestamp': '2025-09-30 23:04:45.014720', 'step': 2566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:45.091476', 'step': 2566, 'epoch': 2} {'type': 'loss', 'content': 0.02140454575419426, 'timestamp': '2025-09-30 23:04:45.102737', 'step': 2567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:45.194573', 'step': 2567, 'epoch': 2} {'type': 'loss', 'content': 0.013651342131197453, 'timestamp': '2025-09-30 23:04:45.203449', 'step': 2568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:45.272993', 'step': 2568, 'epoch': 2} {'type': 'loss', 'content': 0.010944612324237823, 'timestamp': '2025-09-30 23:04:45.277465', 'step': 2569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:45.347418', 'step': 2569, 'epoch': 2} {'type': 'loss', 'content': 0.0005529354675672948, 'timestamp': '2025-09-30 23:04:45.352886', 'step': 2570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:45.416831', 'step': 2570, 'epoch': 2} {'type': 'loss', 'content': 0.014182590879499912, 'timestamp': '2025-09-30 23:04:45.426588', 'step': 2571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:45.500601', 'step': 2571, 'epoch': 2} {'type': 'loss', 'content': 0.005459524225443602, 'timestamp': '2025-09-30 23:04:45.512956', 'step': 2572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:45.589064', 'step': 2572, 'epoch': 2} {'type': 'loss', 'content': 0.031338952481746674, 'timestamp': '2025-09-30 23:04:45.593044', 'step': 2573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:45.653320', 'step': 2573, 'epoch': 2} {'type': 'loss', 'content': 0.008795064873993397, 'timestamp': '2025-09-30 23:04:45.657732', 'step': 2574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:45.720314', 'step': 2574, 'epoch': 2} {'type': 'loss', 'content': 0.010949521325528622, 'timestamp': '2025-09-30 23:04:45.733236', 'step': 2575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:45.803315', 'step': 2575, 'epoch': 2} {'type': 'loss', 'content': 0.003898575436323881, 'timestamp': '2025-09-30 23:04:45.816448', 'step': 2576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:45.886898', 'step': 2576, 'epoch': 2} {'type': 'loss', 'content': 0.005937499459832907, 'timestamp': '2025-09-30 23:04:45.890669', 'step': 2577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:45.963515', 'step': 2577, 'epoch': 2} {'type': 'loss', 'content': 0.047584325075149536, 'timestamp': '2025-09-30 23:04:45.980213', 'step': 2578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:46.064278', 'step': 2578, 'epoch': 2} {'type': 'loss', 'content': 0.001951989601366222, 'timestamp': '2025-09-30 23:04:46.068030', 'step': 2579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:46.150122', 'step': 2579, 'epoch': 2} {'type': 'loss', 'content': 0.030066296458244324, 'timestamp': '2025-09-30 23:04:46.161330', 'step': 2580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:46.237248', 'step': 2580, 'epoch': 2} {'type': 'loss', 'content': 0.016550874337553978, 'timestamp': '2025-09-30 23:04:46.241653', 'step': 2581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:46.304525', 'step': 2581, 'epoch': 2} {'type': 'loss', 'content': 0.020321514457464218, 'timestamp': '2025-09-30 23:04:46.309033', 'step': 2582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:46.366464', 'step': 2582, 'epoch': 2} {'type': 'loss', 'content': 0.037910543382167816, 'timestamp': '2025-09-30 23:04:46.371511', 'step': 2583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:46.461818', 'step': 2583, 'epoch': 2} {'type': 'loss', 'content': 0.003070892533287406, 'timestamp': '2025-09-30 23:04:46.484476', 'step': 2584, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1596914505344}], 'timestamp': '2025-09-30 23:04:51.668839', 'step': 2584, 'epoch': 2} {'type': 'pplx', 'content': 7833877.116827345, 'timestamp': '2025-09-30 23:04:51.684685', 'step': 2584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:51.756840', 'step': 2584, 'epoch': 2} {'type': 'loss', 'content': 0.013471334241330624, 'timestamp': '2025-09-30 23:04:51.769271', 'step': 2585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:51.864525', 'step': 2585, 'epoch': 2} {'type': 'loss', 'content': 0.014409610070288181, 'timestamp': '2025-09-30 23:04:51.868850', 'step': 2586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:51.964796', 'step': 2586, 'epoch': 2} {'type': 'loss', 'content': 0.020833294838666916, 'timestamp': '2025-09-30 23:04:51.978863', 'step': 2587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:52.075842', 'step': 2587, 'epoch': 2} {'type': 'loss', 'content': 0.013057379052042961, 'timestamp': '2025-09-30 23:04:52.082793', 'step': 2588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:52.173977', 'step': 2588, 'epoch': 2} {'type': 'loss', 'content': 0.04552825912833214, 'timestamp': '2025-09-30 23:04:52.190949', 'step': 2589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:52.276538', 'step': 2589, 'epoch': 2} {'type': 'loss', 'content': 0.03352271392941475, 'timestamp': '2025-09-30 23:04:52.281823', 'step': 2590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:52.361195', 'step': 2590, 'epoch': 2} {'type': 'loss', 'content': 0.01743495650589466, 'timestamp': '2025-09-30 23:04:52.372640', 'step': 2591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:52.444557', 'step': 2591, 'epoch': 2} {'type': 'loss', 'content': 0.030223742127418518, 'timestamp': '2025-09-30 23:04:52.452781', 'step': 2592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:52.518711', 'step': 2592, 'epoch': 2} {'type': 'loss', 'content': 0.005190289579331875, 'timestamp': '2025-09-30 23:04:52.526490', 'step': 2593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:52.600600', 'step': 2593, 'epoch': 2} {'type': 'loss', 'content': 0.0013935599708929658, 'timestamp': '2025-09-30 23:04:52.605585', 'step': 2594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:52.661633', 'step': 2594, 'epoch': 2} {'type': 'loss', 'content': 0.005417599808424711, 'timestamp': '2025-09-30 23:04:52.670372', 'step': 2595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:52.756961', 'step': 2595, 'epoch': 2} {'type': 'loss', 'content': 0.05060914158821106, 'timestamp': '2025-09-30 23:04:52.774953', 'step': 2596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:52.855757', 'step': 2596, 'epoch': 2} {'type': 'loss', 'content': 0.007900885306298733, 'timestamp': '2025-09-30 23:04:52.871330', 'step': 2597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:52.944216', 'step': 2597, 'epoch': 2} {'type': 'loss', 'content': 0.011640098877251148, 'timestamp': '2025-09-30 23:04:52.948632', 'step': 2598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:53.030168', 'step': 2598, 'epoch': 2} {'type': 'loss', 'content': 0.008845939300954342, 'timestamp': '2025-09-30 23:04:53.039898', 'step': 2599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:53.120634', 'step': 2599, 'epoch': 2} {'type': 'loss', 'content': 0.018952626734972, 'timestamp': '2025-09-30 23:04:53.127731', 'step': 2600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:53.196127', 'step': 2600, 'epoch': 2} {'type': 'loss', 'content': 0.01001193467527628, 'timestamp': '2025-09-30 23:04:53.206502', 'step': 2601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:53.264767', 'step': 2601, 'epoch': 2} {'type': 'loss', 'content': 0.024688662961125374, 'timestamp': '2025-09-30 23:04:53.273719', 'step': 2602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:53.350423', 'step': 2602, 'epoch': 2} {'type': 'loss', 'content': 0.02688509039580822, 'timestamp': '2025-09-30 23:04:53.353718', 'step': 2603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:53.423946', 'step': 2603, 'epoch': 2} {'type': 'loss', 'content': 0.01984052173793316, 'timestamp': '2025-09-30 23:04:53.437635', 'step': 2604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:53.502681', 'step': 2604, 'epoch': 2} {'type': 'loss', 'content': 0.05766945332288742, 'timestamp': '2025-09-30 23:04:53.507327', 'step': 2605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:53.599425', 'step': 2605, 'epoch': 2} {'type': 'loss', 'content': 0.013490316458046436, 'timestamp': '2025-09-30 23:04:53.613538', 'step': 2606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:53.679746', 'step': 2606, 'epoch': 2} {'type': 'loss', 'content': 0.031110526993870735, 'timestamp': '2025-09-30 23:04:53.691111', 'step': 2607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:53.763256', 'step': 2607, 'epoch': 2} {'type': 'loss', 'content': 0.01368033792823553, 'timestamp': '2025-09-30 23:04:53.771479', 'step': 2608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:53.848360', 'step': 2608, 'epoch': 2} {'type': 'loss', 'content': 0.0194257739931345, 'timestamp': '2025-09-30 23:04:53.852368', 'step': 2609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:53.913039', 'step': 2609, 'epoch': 2} {'type': 'loss', 'content': 0.002062064129859209, 'timestamp': '2025-09-30 23:04:53.916993', 'step': 2610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:53.976404', 'step': 2610, 'epoch': 2} {'type': 'loss', 'content': 0.03837961331009865, 'timestamp': '2025-09-30 23:04:53.979547', 'step': 2611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:54.055788', 'step': 2611, 'epoch': 2} {'type': 'loss', 'content': 0.0019808632787317038, 'timestamp': '2025-09-30 23:04:54.067723', 'step': 2612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:54.131603', 'step': 2612, 'epoch': 2} {'type': 'loss', 'content': 0.02190151996910572, 'timestamp': '2025-09-30 23:04:54.146580', 'step': 2613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:54.213894', 'step': 2613, 'epoch': 2} {'type': 'loss', 'content': 0.007051683031022549, 'timestamp': '2025-09-30 23:04:54.222186', 'step': 2614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:54.287588', 'step': 2614, 'epoch': 2} {'type': 'loss', 'content': 0.0017056518699973822, 'timestamp': '2025-09-30 23:04:54.300091', 'step': 2615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:54.393813', 'step': 2615, 'epoch': 2} {'type': 'loss', 'content': 0.013007411733269691, 'timestamp': '2025-09-30 23:04:54.405790', 'step': 2616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:54.475085', 'step': 2616, 'epoch': 2} {'type': 'loss', 'content': 0.0030193112324923277, 'timestamp': '2025-09-30 23:04:54.485041', 'step': 2617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:54.559894', 'step': 2617, 'epoch': 2} {'type': 'loss', 'content': 0.007124889176338911, 'timestamp': '2025-09-30 23:04:54.570343', 'step': 2618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:54.661521', 'step': 2618, 'epoch': 2} {'type': 'loss', 'content': 0.01693459413945675, 'timestamp': '2025-09-30 23:04:54.671924', 'step': 2619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:54.755007', 'step': 2619, 'epoch': 2} {'type': 'loss', 'content': 0.028724903240799904, 'timestamp': '2025-09-30 23:04:54.769434', 'step': 2620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:54.861870', 'step': 2620, 'epoch': 2} {'type': 'loss', 'content': 0.016828831285238266, 'timestamp': '2025-09-30 23:04:54.884473', 'step': 2621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:54.988996', 'step': 2621, 'epoch': 2} {'type': 'loss', 'content': 0.003758403705433011, 'timestamp': '2025-09-30 23:04:54.995129', 'step': 2622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:55.085969', 'step': 2622, 'epoch': 2} {'type': 'loss', 'content': 0.0034280498512089252, 'timestamp': '2025-09-30 23:04:55.103262', 'step': 2623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:55.184563', 'step': 2623, 'epoch': 2} {'type': 'loss', 'content': 0.074277862906456, 'timestamp': '2025-09-30 23:04:55.195962', 'step': 2624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:55.258735', 'step': 2624, 'epoch': 2} {'type': 'loss', 'content': 0.0055330172181129456, 'timestamp': '2025-09-30 23:04:55.267218', 'step': 2625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:55.340789', 'step': 2625, 'epoch': 2} {'type': 'loss', 'content': 0.013612084090709686, 'timestamp': '2025-09-30 23:04:55.348820', 'step': 2626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:55.417537', 'step': 2626, 'epoch': 2} {'type': 'loss', 'content': 0.054695386439561844, 'timestamp': '2025-09-30 23:04:55.429635', 'step': 2627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:55.501025', 'step': 2627, 'epoch': 2} {'type': 'loss', 'content': 0.021953163668513298, 'timestamp': '2025-09-30 23:04:55.517226', 'step': 2628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:55.593507', 'step': 2628, 'epoch': 2} {'type': 'loss', 'content': 0.044360797852277756, 'timestamp': '2025-09-30 23:04:55.607888', 'step': 2629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:55.687334', 'step': 2629, 'epoch': 2} {'type': 'loss', 'content': 0.02605578675866127, 'timestamp': '2025-09-30 23:04:55.693350', 'step': 2630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:55.751813', 'step': 2630, 'epoch': 2} {'type': 'loss', 'content': 0.023538410663604736, 'timestamp': '2025-09-30 23:04:55.757022', 'step': 2631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:55.821686', 'step': 2631, 'epoch': 2} {'type': 'loss', 'content': 0.009497850202023983, 'timestamp': '2025-09-30 23:04:55.831862', 'step': 2632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:55.897719', 'step': 2632, 'epoch': 2} {'type': 'loss', 'content': 0.026093313470482826, 'timestamp': '2025-09-30 23:04:55.905806', 'step': 2633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:55.974851', 'step': 2633, 'epoch': 2} {'type': 'loss', 'content': 0.04802700877189636, 'timestamp': '2025-09-30 23:04:55.988261', 'step': 2634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:56.074502', 'step': 2634, 'epoch': 2} {'type': 'loss', 'content': 0.05500922352075577, 'timestamp': '2025-09-30 23:04:56.077716', 'step': 2635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:56.149506', 'step': 2635, 'epoch': 2} {'type': 'loss', 'content': 0.05395681411027908, 'timestamp': '2025-09-30 23:04:56.162171', 'step': 2636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:56.248296', 'step': 2636, 'epoch': 2} {'type': 'loss', 'content': 0.006322006694972515, 'timestamp': '2025-09-30 23:04:56.255996', 'step': 2637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:56.322325', 'step': 2637, 'epoch': 2} {'type': 'loss', 'content': 0.02081182412803173, 'timestamp': '2025-09-30 23:04:56.326428', 'step': 2638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:56.389902', 'step': 2638, 'epoch': 2} {'type': 'loss', 'content': 0.03219464793801308, 'timestamp': '2025-09-30 23:04:56.396767', 'step': 2639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:56.461997', 'step': 2639, 'epoch': 2} {'type': 'loss', 'content': 0.007743940223008394, 'timestamp': '2025-09-30 23:04:56.476769', 'step': 2640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:56.559866', 'step': 2640, 'epoch': 2} {'type': 'loss', 'content': 0.00862525962293148, 'timestamp': '2025-09-30 23:04:56.565385', 'step': 2641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:56.625115', 'step': 2641, 'epoch': 2} {'type': 'loss', 'content': 0.027230406180024147, 'timestamp': '2025-09-30 23:04:56.631851', 'step': 2642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:56.696542', 'step': 2642, 'epoch': 2} {'type': 'loss', 'content': 0.030938534066081047, 'timestamp': '2025-09-30 23:04:56.707203', 'step': 2643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:56.780260', 'step': 2643, 'epoch': 2} {'type': 'loss', 'content': 0.02342245541512966, 'timestamp': '2025-09-30 23:04:56.786427', 'step': 2644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:56.844142', 'step': 2644, 'epoch': 2} {'type': 'loss', 'content': 0.008066447451710701, 'timestamp': '2025-09-30 23:04:56.854376', 'step': 2645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:56.918334', 'step': 2645, 'epoch': 2} {'type': 'loss', 'content': 0.0075772968120872974, 'timestamp': '2025-09-30 23:04:56.924881', 'step': 2646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:57.006606', 'step': 2646, 'epoch': 2} {'type': 'loss', 'content': 0.06469673663377762, 'timestamp': '2025-09-30 23:04:57.010526', 'step': 2647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:57.076673', 'step': 2647, 'epoch': 2} {'type': 'loss', 'content': 0.01646665669977665, 'timestamp': '2025-09-30 23:04:57.083675', 'step': 2648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:57.164245', 'step': 2648, 'epoch': 2} {'type': 'loss', 'content': 0.03124770149588585, 'timestamp': '2025-09-30 23:04:57.173832', 'step': 2649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:57.248089', 'step': 2649, 'epoch': 2} {'type': 'loss', 'content': 0.09305630624294281, 'timestamp': '2025-09-30 23:04:57.255606', 'step': 2650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:57.321486', 'step': 2650, 'epoch': 2} {'type': 'loss', 'content': 0.052718643099069595, 'timestamp': '2025-09-30 23:04:57.330272', 'step': 2651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:57.394734', 'step': 2651, 'epoch': 2} {'type': 'loss', 'content': 0.009448138996958733, 'timestamp': '2025-09-30 23:04:57.409362', 'step': 2652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:57.469602', 'step': 2652, 'epoch': 2} {'type': 'loss', 'content': 0.012288440950214863, 'timestamp': '2025-09-30 23:04:57.473249', 'step': 2653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:57.534636', 'step': 2653, 'epoch': 2} {'type': 'loss', 'content': 0.03149456903338432, 'timestamp': '2025-09-30 23:04:57.542878', 'step': 2654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:57.618362', 'step': 2654, 'epoch': 2} {'type': 'loss', 'content': 0.03832715377211571, 'timestamp': '2025-09-30 23:04:57.626836', 'step': 2655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:57.695395', 'step': 2655, 'epoch': 2} {'type': 'loss', 'content': 0.07656332105398178, 'timestamp': '2025-09-30 23:04:57.708551', 'step': 2656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:57.772284', 'step': 2656, 'epoch': 2} {'type': 'loss', 'content': 0.011780538596212864, 'timestamp': '2025-09-30 23:04:57.777480', 'step': 2657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:57.839890', 'step': 2657, 'epoch': 2} {'type': 'loss', 'content': 0.02508932538330555, 'timestamp': '2025-09-30 23:04:57.844540', 'step': 2658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:57.910609', 'step': 2658, 'epoch': 2} {'type': 'loss', 'content': 0.02607378177344799, 'timestamp': '2025-09-30 23:04:57.914320', 'step': 2659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:57.974827', 'step': 2659, 'epoch': 2} {'type': 'loss', 'content': 0.01857917010784149, 'timestamp': '2025-09-30 23:04:57.981359', 'step': 2660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:58.039963', 'step': 2660, 'epoch': 2} {'type': 'loss', 'content': 0.0698215439915657, 'timestamp': '2025-09-30 23:04:58.043654', 'step': 2661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:58.101618', 'step': 2661, 'epoch': 2} {'type': 'loss', 'content': 0.012067750096321106, 'timestamp': '2025-09-30 23:04:58.105735', 'step': 2662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:58.168370', 'step': 2662, 'epoch': 2} {'type': 'loss', 'content': 0.05612918362021446, 'timestamp': '2025-09-30 23:04:58.172511', 'step': 2663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:58.232199', 'step': 2663, 'epoch': 2} {'type': 'loss', 'content': 0.018040673807263374, 'timestamp': '2025-09-30 23:04:58.245949', 'step': 2664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:58.325016', 'step': 2664, 'epoch': 2} {'type': 'loss', 'content': 0.01463252492249012, 'timestamp': '2025-09-30 23:04:58.335747', 'step': 2665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:58.417058', 'step': 2665, 'epoch': 2} {'type': 'loss', 'content': 0.04249858856201172, 'timestamp': '2025-09-30 23:04:58.425409', 'step': 2666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:58.498640', 'step': 2666, 'epoch': 2} {'type': 'loss', 'content': 0.004769507795572281, 'timestamp': '2025-09-30 23:04:58.502752', 'step': 2667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:58.558147', 'step': 2667, 'epoch': 2} {'type': 'loss', 'content': 0.023133903741836548, 'timestamp': '2025-09-30 23:04:58.568089', 'step': 2668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:58.622625', 'step': 2668, 'epoch': 2} {'type': 'loss', 'content': 0.028691943734884262, 'timestamp': '2025-09-30 23:04:58.627536', 'step': 2669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:58.688898', 'step': 2669, 'epoch': 2} {'type': 'loss', 'content': 0.011824160814285278, 'timestamp': '2025-09-30 23:04:58.692061', 'step': 2670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:58.749475', 'step': 2670, 'epoch': 2} {'type': 'loss', 'content': 0.05110222101211548, 'timestamp': '2025-09-30 23:04:58.754503', 'step': 2671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:58.818377', 'step': 2671, 'epoch': 2} {'type': 'loss', 'content': 0.0038277101702988148, 'timestamp': '2025-09-30 23:04:58.824895', 'step': 2672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:58.884601', 'step': 2672, 'epoch': 2} {'type': 'loss', 'content': 0.012608318589627743, 'timestamp': '2025-09-30 23:04:58.887020', 'step': 2673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:58.941467', 'step': 2673, 'epoch': 2} {'type': 'loss', 'content': 0.015692254528403282, 'timestamp': '2025-09-30 23:04:58.944826', 'step': 2674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:59.001270', 'step': 2674, 'epoch': 2} {'type': 'loss', 'content': 0.03534774109721184, 'timestamp': '2025-09-30 23:04:59.010208', 'step': 2675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:59.081240', 'step': 2675, 'epoch': 2} {'type': 'loss', 'content': 0.009373578242957592, 'timestamp': '2025-09-30 23:04:59.088404', 'step': 2676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:59.151523', 'step': 2676, 'epoch': 2} {'type': 'loss', 'content': 0.004665933083742857, 'timestamp': '2025-09-30 23:04:59.154299', 'step': 2677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:59.215324', 'step': 2677, 'epoch': 2} {'type': 'loss', 'content': 0.02954176254570484, 'timestamp': '2025-09-30 23:04:59.221216', 'step': 2678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:59.283286', 'step': 2678, 'epoch': 2} {'type': 'loss', 'content': 0.02026541717350483, 'timestamp': '2025-09-30 23:04:59.286356', 'step': 2679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:59.344051', 'step': 2679, 'epoch': 2} {'type': 'loss', 'content': 0.020806176587939262, 'timestamp': '2025-09-30 23:04:59.351333', 'step': 2680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:59.409808', 'step': 2680, 'epoch': 2} {'type': 'loss', 'content': 0.038793016225099564, 'timestamp': '2025-09-30 23:04:59.412944', 'step': 2681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:59.475870', 'step': 2681, 'epoch': 2} {'type': 'loss', 'content': 0.02840593457221985, 'timestamp': '2025-09-30 23:04:59.478856', 'step': 2682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:59.549513', 'step': 2682, 'epoch': 2} {'type': 'loss', 'content': 0.03060522861778736, 'timestamp': '2025-09-30 23:04:59.554525', 'step': 2683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:59.611129', 'step': 2683, 'epoch': 2} {'type': 'loss', 'content': 0.04451015219092369, 'timestamp': '2025-09-30 23:04:59.618886', 'step': 2684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:59.675021', 'step': 2684, 'epoch': 2} {'type': 'loss', 'content': 0.03557295352220535, 'timestamp': '2025-09-30 23:04:59.678162', 'step': 2685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:59.734602', 'step': 2685, 'epoch': 2} {'type': 'loss', 'content': 0.015259339474141598, 'timestamp': '2025-09-30 23:04:59.740938', 'step': 2686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:04:59.811099', 'step': 2686, 'epoch': 2} {'type': 'loss', 'content': 0.02911846898496151, 'timestamp': '2025-09-30 23:04:59.819384', 'step': 2687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:04:59.888952', 'step': 2687, 'epoch': 2} {'type': 'loss', 'content': 0.042816292494535446, 'timestamp': '2025-09-30 23:04:59.899923', 'step': 2688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:04:59.959809', 'step': 2688, 'epoch': 2} {'type': 'loss', 'content': 0.06147071719169617, 'timestamp': '2025-09-30 23:04:59.965119', 'step': 2689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:00.024347', 'step': 2689, 'epoch': 2} {'type': 'loss', 'content': 0.0058371094055473804, 'timestamp': '2025-09-30 23:05:00.027295', 'step': 2690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:00.083199', 'step': 2690, 'epoch': 2} {'type': 'loss', 'content': 0.030228929594159126, 'timestamp': '2025-09-30 23:05:00.086887', 'step': 2691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:00.145821', 'step': 2691, 'epoch': 2} {'type': 'loss', 'content': 0.03507154807448387, 'timestamp': '2025-09-30 23:05:00.152375', 'step': 2692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:00.206546', 'step': 2692, 'epoch': 2} {'type': 'loss', 'content': 0.0142065966501832, 'timestamp': '2025-09-30 23:05:00.209655', 'step': 2693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:00.266526', 'step': 2693, 'epoch': 2} {'type': 'loss', 'content': 0.011375081725418568, 'timestamp': '2025-09-30 23:05:00.275842', 'step': 2694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:00.338532', 'step': 2694, 'epoch': 2} {'type': 'loss', 'content': 0.015636639669537544, 'timestamp': '2025-09-30 23:05:00.344207', 'step': 2695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:00.406957', 'step': 2695, 'epoch': 2} {'type': 'loss', 'content': 0.039857037365436554, 'timestamp': '2025-09-30 23:05:00.413896', 'step': 2696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:00.470937', 'step': 2696, 'epoch': 2} {'type': 'loss', 'content': 0.057773102074861526, 'timestamp': '2025-09-30 23:05:00.478064', 'step': 2697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:00.537246', 'step': 2697, 'epoch': 2} {'type': 'loss', 'content': 0.0190952830016613, 'timestamp': '2025-09-30 23:05:00.540494', 'step': 2698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:00.598021', 'step': 2698, 'epoch': 2} {'type': 'loss', 'content': 0.04285770282149315, 'timestamp': '2025-09-30 23:05:00.603278', 'step': 2699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:00.661673', 'step': 2699, 'epoch': 2} {'type': 'loss', 'content': 0.020988060161471367, 'timestamp': '2025-09-30 23:05:00.678306', 'step': 2700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:00.757790', 'step': 2700, 'epoch': 2} {'type': 'loss', 'content': 0.01768564246594906, 'timestamp': '2025-09-30 23:05:00.761011', 'step': 2701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:00.820735', 'step': 2701, 'epoch': 2} {'type': 'loss', 'content': 0.004466019570827484, 'timestamp': '2025-09-30 23:05:00.823591', 'step': 2702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:00.880646', 'step': 2702, 'epoch': 2} {'type': 'loss', 'content': 0.01900581456720829, 'timestamp': '2025-09-30 23:05:00.883010', 'step': 2703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:00.939266', 'step': 2703, 'epoch': 2} {'type': 'loss', 'content': 0.013263829983770847, 'timestamp': '2025-09-30 23:05:00.945931', 'step': 2704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:01.002652', 'step': 2704, 'epoch': 2} {'type': 'loss', 'content': 0.016548460349440575, 'timestamp': '2025-09-30 23:05:01.005602', 'step': 2705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:01.060999', 'step': 2705, 'epoch': 2} {'type': 'loss', 'content': 0.030307959765195847, 'timestamp': '2025-09-30 23:05:01.064294', 'step': 2706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:01.118629', 'step': 2706, 'epoch': 2} {'type': 'loss', 'content': 0.01416084822267294, 'timestamp': '2025-09-30 23:05:01.121552', 'step': 2707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:01.183320', 'step': 2707, 'epoch': 2} {'type': 'loss', 'content': 0.00548711558803916, 'timestamp': '2025-09-30 23:05:01.189729', 'step': 2708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:01.257275', 'step': 2708, 'epoch': 2} {'type': 'loss', 'content': 0.01228051632642746, 'timestamp': '2025-09-30 23:05:01.262565', 'step': 2709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:01.335182', 'step': 2709, 'epoch': 2} {'type': 'loss', 'content': 0.032637111842632294, 'timestamp': '2025-09-30 23:05:01.337991', 'step': 2710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:01.398028', 'step': 2710, 'epoch': 2} {'type': 'loss', 'content': 0.052083972841501236, 'timestamp': '2025-09-30 23:05:01.400848', 'step': 2711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:05:01.461356', 'step': 2711, 'epoch': 2} {'type': 'loss', 'content': 0.008047773502767086, 'timestamp': '2025-09-30 23:05:01.472567', 'step': 2712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:05:01.532151', 'step': 2712, 'epoch': 2} {'type': 'loss', 'content': 0.07280652970075607, 'timestamp': '2025-09-30 23:05:01.541726', 'step': 2713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:01.602163', 'step': 2713, 'epoch': 2} {'type': 'loss', 'content': 0.021811990067362785, 'timestamp': '2025-09-30 23:05:01.605073', 'step': 2714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:01.661192', 'step': 2714, 'epoch': 2} {'type': 'loss', 'content': 0.07360333949327469, 'timestamp': '2025-09-30 23:05:01.664940', 'step': 2715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:01.725855', 'step': 2715, 'epoch': 2} {'type': 'loss', 'content': 0.017844831570982933, 'timestamp': '2025-09-30 23:05:01.732093', 'step': 2716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:01.792955', 'step': 2716, 'epoch': 2} {'type': 'loss', 'content': 0.010997948236763477, 'timestamp': '2025-09-30 23:05:01.795602', 'step': 2717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:01.851722', 'step': 2717, 'epoch': 2} {'type': 'loss', 'content': 0.07001028954982758, 'timestamp': '2025-09-30 23:05:01.855210', 'step': 2718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:01.920265', 'step': 2718, 'epoch': 2} {'type': 'loss', 'content': 0.01877439022064209, 'timestamp': '2025-09-30 23:05:01.943595', 'step': 2719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:02.003197', 'step': 2719, 'epoch': 2} {'type': 'loss', 'content': 0.05145663768053055, 'timestamp': '2025-09-30 23:05:02.010417', 'step': 2720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:02.075986', 'step': 2720, 'epoch': 2} {'type': 'loss', 'content': 0.03276502341032028, 'timestamp': '2025-09-30 23:05:02.079418', 'step': 2721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:02.135093', 'step': 2721, 'epoch': 2} {'type': 'loss', 'content': 0.007687762845307589, 'timestamp': '2025-09-30 23:05:02.139212', 'step': 2722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:02.199704', 'step': 2722, 'epoch': 2} {'type': 'loss', 'content': 0.002442445605993271, 'timestamp': '2025-09-30 23:05:02.202982', 'step': 2723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:02.263481', 'step': 2723, 'epoch': 2} {'type': 'loss', 'content': 0.026576295495033264, 'timestamp': '2025-09-30 23:05:02.276372', 'step': 2724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:02.337791', 'step': 2724, 'epoch': 2} {'type': 'loss', 'content': 0.037630435079336166, 'timestamp': '2025-09-30 23:05:02.341430', 'step': 2725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:02.398551', 'step': 2725, 'epoch': 2} {'type': 'loss', 'content': 0.0524810254573822, 'timestamp': '2025-09-30 23:05:02.402729', 'step': 2726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:02.467357', 'step': 2726, 'epoch': 2} {'type': 'loss', 'content': 0.021158378571271896, 'timestamp': '2025-09-30 23:05:02.475300', 'step': 2727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:02.538378', 'step': 2727, 'epoch': 2} {'type': 'loss', 'content': 0.01678326539695263, 'timestamp': '2025-09-30 23:05:02.548713', 'step': 2728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:02.606011', 'step': 2728, 'epoch': 2} {'type': 'loss', 'content': 0.033713288605213165, 'timestamp': '2025-09-30 23:05:02.611583', 'step': 2729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:02.681343', 'step': 2729, 'epoch': 2} {'type': 'loss', 'content': 0.0659589022397995, 'timestamp': '2025-09-30 23:05:02.685821', 'step': 2730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:02.748698', 'step': 2730, 'epoch': 2} {'type': 'loss', 'content': 0.03020251728594303, 'timestamp': '2025-09-30 23:05:02.752040', 'step': 2731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:02.813338', 'step': 2731, 'epoch': 2} {'type': 'loss', 'content': 0.023467451333999634, 'timestamp': '2025-09-30 23:05:02.820226', 'step': 2732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:02.878334', 'step': 2732, 'epoch': 2} {'type': 'loss', 'content': 0.01563424989581108, 'timestamp': '2025-09-30 23:05:02.885743', 'step': 2733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:02.948949', 'step': 2733, 'epoch': 2} {'type': 'loss', 'content': 0.026061130687594414, 'timestamp': '2025-09-30 23:05:02.953373', 'step': 2734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:03.008872', 'step': 2734, 'epoch': 2} {'type': 'loss', 'content': 0.017374156042933464, 'timestamp': '2025-09-30 23:05:03.012267', 'step': 2735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:03.068685', 'step': 2735, 'epoch': 2} {'type': 'loss', 'content': 0.01377827674150467, 'timestamp': '2025-09-30 23:05:03.078978', 'step': 2736, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1596914505344}], 'timestamp': '2025-09-30 23:05:07.481558', 'step': 2736, 'epoch': 2} {'type': 'pplx', 'content': 7998621.281942797, 'timestamp': '2025-09-30 23:05:07.488234', 'step': 2736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:07.548995', 'step': 2736, 'epoch': 2} {'type': 'loss', 'content': 0.007643009535968304, 'timestamp': '2025-09-30 23:05:07.554580', 'step': 2737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:07.612563', 'step': 2737, 'epoch': 2} {'type': 'loss', 'content': 0.021152349188923836, 'timestamp': '2025-09-30 23:05:07.617724', 'step': 2738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:07.681904', 'step': 2738, 'epoch': 2} {'type': 'loss', 'content': 0.028926750645041466, 'timestamp': '2025-09-30 23:05:07.690490', 'step': 2739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:07.758059', 'step': 2739, 'epoch': 2} {'type': 'loss', 'content': 0.010700655169785023, 'timestamp': '2025-09-30 23:05:07.767973', 'step': 2740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:07.834562', 'step': 2740, 'epoch': 2} {'type': 'loss', 'content': 0.04410426691174507, 'timestamp': '2025-09-30 23:05:07.844102', 'step': 2741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:07.905890', 'step': 2741, 'epoch': 2} {'type': 'loss', 'content': 0.0590062141418457, 'timestamp': '2025-09-30 23:05:07.911530', 'step': 2742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:07.971969', 'step': 2742, 'epoch': 2} {'type': 'loss', 'content': 0.0067772273905575275, 'timestamp': '2025-09-30 23:05:07.974743', 'step': 2743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:08.038700', 'step': 2743, 'epoch': 2} {'type': 'loss', 'content': 0.009907214902341366, 'timestamp': '2025-09-30 23:05:08.046734', 'step': 2744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:08.109236', 'step': 2744, 'epoch': 2} {'type': 'loss', 'content': 0.01189845334738493, 'timestamp': '2025-09-30 23:05:08.113593', 'step': 2745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:08.174854', 'step': 2745, 'epoch': 2} {'type': 'loss', 'content': 0.02412308380007744, 'timestamp': '2025-09-30 23:05:08.177925', 'step': 2746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:08.236547', 'step': 2746, 'epoch': 2} {'type': 'loss', 'content': 0.040016114711761475, 'timestamp': '2025-09-30 23:05:08.239934', 'step': 2747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:08.301997', 'step': 2747, 'epoch': 2} {'type': 'loss', 'content': 0.015506272204220295, 'timestamp': '2025-09-30 23:05:08.307684', 'step': 2748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:08.365510', 'step': 2748, 'epoch': 2} {'type': 'loss', 'content': 0.04252577945590019, 'timestamp': '2025-09-30 23:05:08.368908', 'step': 2749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:08.427704', 'step': 2749, 'epoch': 2} {'type': 'loss', 'content': 0.007695195265114307, 'timestamp': '2025-09-30 23:05:08.436395', 'step': 2750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:08.508053', 'step': 2750, 'epoch': 2} {'type': 'loss', 'content': 0.020126687362790108, 'timestamp': '2025-09-30 23:05:08.511599', 'step': 2751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:08.573191', 'step': 2751, 'epoch': 2} {'type': 'loss', 'content': 0.017023108899593353, 'timestamp': '2025-09-30 23:05:08.580328', 'step': 2752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:05:08.635675', 'step': 2752, 'epoch': 2} {'type': 'loss', 'content': 0.014412411488592625, 'timestamp': '2025-09-30 23:05:08.640741', 'step': 2753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:08.703054', 'step': 2753, 'epoch': 2} {'type': 'loss', 'content': 0.04213390126824379, 'timestamp': '2025-09-30 23:05:08.705959', 'step': 2754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:08.760224', 'step': 2754, 'epoch': 2} {'type': 'loss', 'content': 0.018684914335608482, 'timestamp': '2025-09-30 23:05:08.768141', 'step': 2755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:08.825573', 'step': 2755, 'epoch': 2} {'type': 'loss', 'content': 0.013772299513220787, 'timestamp': '2025-09-30 23:05:08.834936', 'step': 2756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:08.895029', 'step': 2756, 'epoch': 2} {'type': 'loss', 'content': 0.013789281249046326, 'timestamp': '2025-09-30 23:05:08.899057', 'step': 2757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:08.957248', 'step': 2757, 'epoch': 2} {'type': 'loss', 'content': 0.012734184972941875, 'timestamp': '2025-09-30 23:05:08.960308', 'step': 2758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:05:09.018818', 'step': 2758, 'epoch': 2} {'type': 'loss', 'content': 0.0035284983459860086, 'timestamp': '2025-09-30 23:05:09.026814', 'step': 2759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:09.088630', 'step': 2759, 'epoch': 2} {'type': 'loss', 'content': 0.0024368001613765955, 'timestamp': '2025-09-30 23:05:09.095754', 'step': 2760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:05:09.157547', 'step': 2760, 'epoch': 2} {'type': 'loss', 'content': 0.034709520637989044, 'timestamp': '2025-09-30 23:05:09.161005', 'step': 2761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:09.222114', 'step': 2761, 'epoch': 2} {'type': 'loss', 'content': 0.059273041784763336, 'timestamp': '2025-09-30 23:05:09.229600', 'step': 2762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:09.291735', 'step': 2762, 'epoch': 2} {'type': 'loss', 'content': 0.012790483422577381, 'timestamp': '2025-09-30 23:05:09.298272', 'step': 2763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:09.365975', 'step': 2763, 'epoch': 2} {'type': 'loss', 'content': 0.006520257331430912, 'timestamp': '2025-09-30 23:05:09.377282', 'step': 2764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:09.454345', 'step': 2764, 'epoch': 2} {'type': 'loss', 'content': 0.0572698637843132, 'timestamp': '2025-09-30 23:05:09.458501', 'step': 2765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:09.517074', 'step': 2765, 'epoch': 2} {'type': 'loss', 'content': 0.03898325189948082, 'timestamp': '2025-09-30 23:05:09.520911', 'step': 2766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:09.587701', 'step': 2766, 'epoch': 2} {'type': 'loss', 'content': 0.008947591297328472, 'timestamp': '2025-09-30 23:05:09.591869', 'step': 2767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:09.658836', 'step': 2767, 'epoch': 2} {'type': 'loss', 'content': 0.021723952144384384, 'timestamp': '2025-09-30 23:05:09.670349', 'step': 2768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:09.734081', 'step': 2768, 'epoch': 2} {'type': 'loss', 'content': 0.002910384675487876, 'timestamp': '2025-09-30 23:05:09.742984', 'step': 2769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:09.815358', 'step': 2769, 'epoch': 2} {'type': 'loss', 'content': 0.04110255464911461, 'timestamp': '2025-09-30 23:05:09.819101', 'step': 2770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:09.882618', 'step': 2770, 'epoch': 2} {'type': 'loss', 'content': 0.06053806468844414, 'timestamp': '2025-09-30 23:05:09.888274', 'step': 2771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:09.948600', 'step': 2771, 'epoch': 2} {'type': 'loss', 'content': 0.02902268059551716, 'timestamp': '2025-09-30 23:05:09.954625', 'step': 2772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:10.017598', 'step': 2772, 'epoch': 2} {'type': 'loss', 'content': 0.055856358259916306, 'timestamp': '2025-09-30 23:05:10.021040', 'step': 2773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:10.089992', 'step': 2773, 'epoch': 2} {'type': 'loss', 'content': 0.049139466136693954, 'timestamp': '2025-09-30 23:05:10.100184', 'step': 2774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:10.175014', 'step': 2774, 'epoch': 2} {'type': 'loss', 'content': 0.025098862126469612, 'timestamp': '2025-09-30 23:05:10.180019', 'step': 2775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:10.241359', 'step': 2775, 'epoch': 2} {'type': 'loss', 'content': 0.007071125786751509, 'timestamp': '2025-09-30 23:05:10.248039', 'step': 2776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:10.311988', 'step': 2776, 'epoch': 2} {'type': 'loss', 'content': 0.0005493819480761886, 'timestamp': '2025-09-30 23:05:10.317551', 'step': 2777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:10.381119', 'step': 2777, 'epoch': 2} {'type': 'loss', 'content': 0.01608915440738201, 'timestamp': '2025-09-30 23:05:10.385374', 'step': 2778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:10.444295', 'step': 2778, 'epoch': 2} {'type': 'loss', 'content': 0.0224476158618927, 'timestamp': '2025-09-30 23:05:10.448857', 'step': 2779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:10.507542', 'step': 2779, 'epoch': 2} {'type': 'loss', 'content': 0.023309389129281044, 'timestamp': '2025-09-30 23:05:10.515958', 'step': 2780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:05:10.575077', 'step': 2780, 'epoch': 2} {'type': 'loss', 'content': 0.005086394492536783, 'timestamp': '2025-09-30 23:05:10.579346', 'step': 2781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:10.636908', 'step': 2781, 'epoch': 2} {'type': 'loss', 'content': 0.02428785338997841, 'timestamp': '2025-09-30 23:05:10.640485', 'step': 2782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:10.696381', 'step': 2782, 'epoch': 2} {'type': 'loss', 'content': 0.015183540992438793, 'timestamp': '2025-09-30 23:05:10.699251', 'step': 2783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:10.765373', 'step': 2783, 'epoch': 2} {'type': 'loss', 'content': 0.0359315387904644, 'timestamp': '2025-09-30 23:05:10.778142', 'step': 2784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:10.845251', 'step': 2784, 'epoch': 2} {'type': 'loss', 'content': 0.005914172623306513, 'timestamp': '2025-09-30 23:05:10.860463', 'step': 2785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:10.973080', 'step': 2785, 'epoch': 2} {'type': 'loss', 'content': 0.03776617720723152, 'timestamp': '2025-09-30 23:05:10.994152', 'step': 2786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:11.080656', 'step': 2786, 'epoch': 2} {'type': 'loss', 'content': 0.03518933802843094, 'timestamp': '2025-09-30 23:05:11.090191', 'step': 2787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:11.173562', 'step': 2787, 'epoch': 2} {'type': 'loss', 'content': 0.020077379420399666, 'timestamp': '2025-09-30 23:05:11.181194', 'step': 2788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:11.249053', 'step': 2788, 'epoch': 2} {'type': 'loss', 'content': 0.03434090316295624, 'timestamp': '2025-09-30 23:05:11.252471', 'step': 2789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:11.312351', 'step': 2789, 'epoch': 2} {'type': 'loss', 'content': 0.018457189202308655, 'timestamp': '2025-09-30 23:05:11.320492', 'step': 2790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:05:11.395673', 'step': 2790, 'epoch': 2} {'type': 'loss', 'content': 0.018717357888817787, 'timestamp': '2025-09-30 23:05:11.401249', 'step': 2791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:11.471018', 'step': 2791, 'epoch': 2} {'type': 'loss', 'content': 0.04471837729215622, 'timestamp': '2025-09-30 23:05:11.477833', 'step': 2792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:11.550293', 'step': 2792, 'epoch': 2} {'type': 'loss', 'content': 0.021965859457850456, 'timestamp': '2025-09-30 23:05:11.560941', 'step': 2793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:11.635865', 'step': 2793, 'epoch': 2} {'type': 'loss', 'content': 0.012825971469283104, 'timestamp': '2025-09-30 23:05:11.645540', 'step': 2794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:11.720356', 'step': 2794, 'epoch': 2} {'type': 'loss', 'content': 0.014128429815173149, 'timestamp': '2025-09-30 23:05:11.725620', 'step': 2795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:11.808127', 'step': 2795, 'epoch': 2} {'type': 'loss', 'content': 0.02322007156908512, 'timestamp': '2025-09-30 23:05:11.815726', 'step': 2796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:11.895541', 'step': 2796, 'epoch': 2} {'type': 'loss', 'content': 0.003708257805556059, 'timestamp': '2025-09-30 23:05:11.898982', 'step': 2797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:11.968613', 'step': 2797, 'epoch': 2} {'type': 'loss', 'content': 0.026796985417604446, 'timestamp': '2025-09-30 23:05:11.977093', 'step': 2798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:12.051373', 'step': 2798, 'epoch': 2} {'type': 'loss', 'content': 0.010228311643004417, 'timestamp': '2025-09-30 23:05:12.060629', 'step': 2799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:12.122481', 'step': 2799, 'epoch': 2} {'type': 'loss', 'content': 0.012666887603700161, 'timestamp': '2025-09-30 23:05:12.130503', 'step': 2800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:12.195081', 'step': 2800, 'epoch': 2} {'type': 'loss', 'content': 0.02242611162364483, 'timestamp': '2025-09-30 23:05:12.205312', 'step': 2801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:12.280755', 'step': 2801, 'epoch': 2} {'type': 'loss', 'content': 0.031788211315870285, 'timestamp': '2025-09-30 23:05:12.288897', 'step': 2802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:12.366915', 'step': 2802, 'epoch': 2} {'type': 'loss', 'content': 0.00908071082085371, 'timestamp': '2025-09-30 23:05:12.376301', 'step': 2803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:12.452210', 'step': 2803, 'epoch': 2} {'type': 'loss', 'content': 0.0059288558550179005, 'timestamp': '2025-09-30 23:05:12.466953', 'step': 2804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:12.535496', 'step': 2804, 'epoch': 2} {'type': 'loss', 'content': 0.01727209985256195, 'timestamp': '2025-09-30 23:05:12.539676', 'step': 2805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:12.607526', 'step': 2805, 'epoch': 2} {'type': 'loss', 'content': 0.03590935841202736, 'timestamp': '2025-09-30 23:05:12.611080', 'step': 2806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:12.671405', 'step': 2806, 'epoch': 2} {'type': 'loss', 'content': 0.03455566614866257, 'timestamp': '2025-09-30 23:05:12.685905', 'step': 2807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:12.779864', 'step': 2807, 'epoch': 2} {'type': 'loss', 'content': 0.010828351601958275, 'timestamp': '2025-09-30 23:05:12.796982', 'step': 2808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:12.886463', 'step': 2808, 'epoch': 2} {'type': 'loss', 'content': 0.0016058151377364993, 'timestamp': '2025-09-30 23:05:12.896000', 'step': 2809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:12.975672', 'step': 2809, 'epoch': 2} {'type': 'loss', 'content': 0.012550147250294685, 'timestamp': '2025-09-30 23:05:12.981855', 'step': 2810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:13.051277', 'step': 2810, 'epoch': 2} {'type': 'loss', 'content': 0.003417808562517166, 'timestamp': '2025-09-30 23:05:13.062574', 'step': 2811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:13.126484', 'step': 2811, 'epoch': 2} {'type': 'loss', 'content': 0.01270789373666048, 'timestamp': '2025-09-30 23:05:13.140696', 'step': 2812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:13.220603', 'step': 2812, 'epoch': 2} {'type': 'loss', 'content': 0.0017874229233711958, 'timestamp': '2025-09-30 23:05:13.233004', 'step': 2813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:13.298861', 'step': 2813, 'epoch': 2} {'type': 'loss', 'content': 0.010661625303328037, 'timestamp': '2025-09-30 23:05:13.303695', 'step': 2814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:13.383672', 'step': 2814, 'epoch': 2} {'type': 'loss', 'content': 0.024687038734555244, 'timestamp': '2025-09-30 23:05:13.395455', 'step': 2815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:13.486157', 'step': 2815, 'epoch': 2} {'type': 'loss', 'content': 0.030481066554784775, 'timestamp': '2025-09-30 23:05:13.501632', 'step': 2816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:13.577546', 'step': 2816, 'epoch': 2} {'type': 'loss', 'content': 0.004611384589225054, 'timestamp': '2025-09-30 23:05:13.589197', 'step': 2817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:13.677987', 'step': 2817, 'epoch': 2} {'type': 'loss', 'content': 0.024697406217455864, 'timestamp': '2025-09-30 23:05:13.690330', 'step': 2818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:13.780829', 'step': 2818, 'epoch': 2} {'type': 'loss', 'content': 0.01620721071958542, 'timestamp': '2025-09-30 23:05:13.793539', 'step': 2819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:13.884130', 'step': 2819, 'epoch': 2} {'type': 'loss', 'content': 0.0052399099804461, 'timestamp': '2025-09-30 23:05:13.905431', 'step': 2820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:05:14.002241', 'step': 2820, 'epoch': 2} {'type': 'loss', 'content': 0.044309165328741074, 'timestamp': '2025-09-30 23:05:14.018339', 'step': 2821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:14.115402', 'step': 2821, 'epoch': 2} {'type': 'loss', 'content': 0.00798728782683611, 'timestamp': '2025-09-30 23:05:14.120494', 'step': 2822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:14.207792', 'step': 2822, 'epoch': 2} {'type': 'loss', 'content': 0.03296009451150894, 'timestamp': '2025-09-30 23:05:14.213489', 'step': 2823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:14.307539', 'step': 2823, 'epoch': 2} {'type': 'loss', 'content': 0.05011915788054466, 'timestamp': '2025-09-30 23:05:14.315730', 'step': 2824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:14.379638', 'step': 2824, 'epoch': 2} {'type': 'loss', 'content': 0.022591661661863327, 'timestamp': '2025-09-30 23:05:14.396947', 'step': 2825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:14.467513', 'step': 2825, 'epoch': 2} {'type': 'loss', 'content': 0.040130365639925, 'timestamp': '2025-09-30 23:05:14.472703', 'step': 2826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:14.551834', 'step': 2826, 'epoch': 2} {'type': 'loss', 'content': 0.11589137464761734, 'timestamp': '2025-09-30 23:05:14.557225', 'step': 2827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:14.619608', 'step': 2827, 'epoch': 2} {'type': 'loss', 'content': 0.023112019523978233, 'timestamp': '2025-09-30 23:05:14.634885', 'step': 2828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:14.700729', 'step': 2828, 'epoch': 2} {'type': 'loss', 'content': 0.013126117177307606, 'timestamp': '2025-09-30 23:05:14.704594', 'step': 2829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:05:14.773419', 'step': 2829, 'epoch': 2} {'type': 'loss', 'content': 0.0024939384311437607, 'timestamp': '2025-09-30 23:05:14.777461', 'step': 2830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:14.843024', 'step': 2830, 'epoch': 2} {'type': 'loss', 'content': 0.028416991233825684, 'timestamp': '2025-09-30 23:05:14.853316', 'step': 2831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:14.927284', 'step': 2831, 'epoch': 2} {'type': 'loss', 'content': 0.004695421550422907, 'timestamp': '2025-09-30 23:05:14.943937', 'step': 2832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:15.020587', 'step': 2832, 'epoch': 2} {'type': 'loss', 'content': 0.0329706110060215, 'timestamp': '2025-09-30 23:05:15.032637', 'step': 2833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:15.103089', 'step': 2833, 'epoch': 2} {'type': 'loss', 'content': 0.011061002500355244, 'timestamp': '2025-09-30 23:05:15.108117', 'step': 2834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:15.172800', 'step': 2834, 'epoch': 2} {'type': 'loss', 'content': 0.07239875942468643, 'timestamp': '2025-09-30 23:05:15.176596', 'step': 2835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:15.246818', 'step': 2835, 'epoch': 2} {'type': 'loss', 'content': 0.10527931898832321, 'timestamp': '2025-09-30 23:05:15.259721', 'step': 2836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:15.329899', 'step': 2836, 'epoch': 2} {'type': 'loss', 'content': 0.010454251430928707, 'timestamp': '2025-09-30 23:05:15.339820', 'step': 2837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:15.421022', 'step': 2837, 'epoch': 2} {'type': 'loss', 'content': 0.019551919773221016, 'timestamp': '2025-09-30 23:05:15.430621', 'step': 2838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:15.508005', 'step': 2838, 'epoch': 2} {'type': 'loss', 'content': 0.030193541198968887, 'timestamp': '2025-09-30 23:05:15.513520', 'step': 2839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:15.593203', 'step': 2839, 'epoch': 2} {'type': 'loss', 'content': 0.03285510465502739, 'timestamp': '2025-09-30 23:05:15.610062', 'step': 2840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:05:15.684653', 'step': 2840, 'epoch': 2} {'type': 'loss', 'content': 0.02598411962389946, 'timestamp': '2025-09-30 23:05:15.698448', 'step': 2841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:15.785135', 'step': 2841, 'epoch': 2} {'type': 'loss', 'content': 0.0020946357399225235, 'timestamp': '2025-09-30 23:05:15.798594', 'step': 2842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:15.890067', 'step': 2842, 'epoch': 2} {'type': 'loss', 'content': 0.02241087332367897, 'timestamp': '2025-09-30 23:05:15.903484', 'step': 2843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:15.984502', 'step': 2843, 'epoch': 2} {'type': 'loss', 'content': 0.01563403010368347, 'timestamp': '2025-09-30 23:05:15.991847', 'step': 2844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:16.076400', 'step': 2844, 'epoch': 2} {'type': 'loss', 'content': 0.012195399031043053, 'timestamp': '2025-09-30 23:05:16.089183', 'step': 2845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:16.155600', 'step': 2845, 'epoch': 2} {'type': 'loss', 'content': 0.03627203032374382, 'timestamp': '2025-09-30 23:05:16.167756', 'step': 2846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:05:16.262089', 'step': 2846, 'epoch': 2} {'type': 'loss', 'content': 0.0555683858692646, 'timestamp': '2025-09-30 23:05:16.281430', 'step': 2847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:16.381015', 'step': 2847, 'epoch': 2} {'type': 'loss', 'content': 0.008491910062730312, 'timestamp': '2025-09-30 23:05:16.394888', 'step': 2848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:16.485362', 'step': 2848, 'epoch': 2} {'type': 'loss', 'content': 0.011070271953940392, 'timestamp': '2025-09-30 23:05:16.496804', 'step': 2849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:16.577418', 'step': 2849, 'epoch': 2} {'type': 'loss', 'content': 0.016466723755002022, 'timestamp': '2025-09-30 23:05:16.590102', 'step': 2850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:05:16.670945', 'step': 2850, 'epoch': 2} {'type': 'loss', 'content': 0.01724032685160637, 'timestamp': '2025-09-30 23:05:16.681714', 'step': 2851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:16.764126', 'step': 2851, 'epoch': 2} {'type': 'loss', 'content': 0.012006739154458046, 'timestamp': '2025-09-30 23:05:16.781410', 'step': 2852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:16.846405', 'step': 2852, 'epoch': 2} {'type': 'loss', 'content': 0.03738158568739891, 'timestamp': '2025-09-30 23:05:16.856964', 'step': 2853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:16.935399', 'step': 2853, 'epoch': 2} {'type': 'loss', 'content': 0.03628024458885193, 'timestamp': '2025-09-30 23:05:16.948151', 'step': 2854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:17.003569', 'step': 2854, 'epoch': 2} {'type': 'loss', 'content': 0.013947752304375172, 'timestamp': '2025-09-30 23:05:17.014287', 'step': 2855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:17.091638', 'step': 2855, 'epoch': 2} {'type': 'loss', 'content': 0.06021096929907799, 'timestamp': '2025-09-30 23:05:17.107327', 'step': 2856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:17.190957', 'step': 2856, 'epoch': 2} {'type': 'loss', 'content': 0.004537122789770365, 'timestamp': '2025-09-30 23:05:17.204627', 'step': 2857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:17.289748', 'step': 2857, 'epoch': 2} {'type': 'loss', 'content': 0.027024727314710617, 'timestamp': '2025-09-30 23:05:17.301261', 'step': 2858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:17.390822', 'step': 2858, 'epoch': 2} {'type': 'loss', 'content': 0.02708411030471325, 'timestamp': '2025-09-30 23:05:17.401419', 'step': 2859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:17.483145', 'step': 2859, 'epoch': 2} {'type': 'loss', 'content': 0.044647663831710815, 'timestamp': '2025-09-30 23:05:17.489775', 'step': 2860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:17.570474', 'step': 2860, 'epoch': 2} {'type': 'loss', 'content': 0.011015361174941063, 'timestamp': '2025-09-30 23:05:17.578924', 'step': 2861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:17.655281', 'step': 2861, 'epoch': 2} {'type': 'loss', 'content': 0.03713269904255867, 'timestamp': '2025-09-30 23:05:17.668524', 'step': 2862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:17.748088', 'step': 2862, 'epoch': 2} {'type': 'loss', 'content': 0.02445868030190468, 'timestamp': '2025-09-30 23:05:17.762228', 'step': 2863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:17.839298', 'step': 2863, 'epoch': 2} {'type': 'loss', 'content': 0.013084126636385918, 'timestamp': '2025-09-30 23:05:17.851448', 'step': 2864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:17.928652', 'step': 2864, 'epoch': 2} {'type': 'loss', 'content': 0.007721780799329281, 'timestamp': '2025-09-30 23:05:17.937408', 'step': 2865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:18.017229', 'step': 2865, 'epoch': 2} {'type': 'loss', 'content': 0.02651233971118927, 'timestamp': '2025-09-30 23:05:18.020729', 'step': 2866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:18.091274', 'step': 2866, 'epoch': 2} {'type': 'loss', 'content': 0.003134528873488307, 'timestamp': '2025-09-30 23:05:18.095301', 'step': 2867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:18.181793', 'step': 2867, 'epoch': 2} {'type': 'loss', 'content': 0.00357429264113307, 'timestamp': '2025-09-30 23:05:18.195339', 'step': 2868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:18.275716', 'step': 2868, 'epoch': 2} {'type': 'loss', 'content': 0.048905063420534134, 'timestamp': '2025-09-30 23:05:18.285875', 'step': 2869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:18.369890', 'step': 2869, 'epoch': 2} {'type': 'loss', 'content': 0.036143988370895386, 'timestamp': '2025-09-30 23:05:18.381720', 'step': 2870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:18.458517', 'step': 2870, 'epoch': 2} {'type': 'loss', 'content': 0.015725841745734215, 'timestamp': '2025-09-30 23:05:18.462068', 'step': 2871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:18.537192', 'step': 2871, 'epoch': 2} {'type': 'loss', 'content': 0.012214434333145618, 'timestamp': '2025-09-30 23:05:18.550448', 'step': 2872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:18.623864', 'step': 2872, 'epoch': 2} {'type': 'loss', 'content': 0.026979634538292885, 'timestamp': '2025-09-30 23:05:18.631064', 'step': 2873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:18.698659', 'step': 2873, 'epoch': 2} {'type': 'loss', 'content': 0.025963781401515007, 'timestamp': '2025-09-30 23:05:18.706532', 'step': 2874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:18.776624', 'step': 2874, 'epoch': 2} {'type': 'loss', 'content': 0.023804010823369026, 'timestamp': '2025-09-30 23:05:18.782964', 'step': 2875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:18.853519', 'step': 2875, 'epoch': 2} {'type': 'loss', 'content': 0.010200722143054008, 'timestamp': '2025-09-30 23:05:18.864381', 'step': 2876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:18.934666', 'step': 2876, 'epoch': 2} {'type': 'loss', 'content': 0.05217448249459267, 'timestamp': '2025-09-30 23:05:18.946459', 'step': 2877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:19.016012', 'step': 2877, 'epoch': 2} {'type': 'loss', 'content': 0.01292093563824892, 'timestamp': '2025-09-30 23:05:19.020896', 'step': 2878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:19.083367', 'step': 2878, 'epoch': 2} {'type': 'loss', 'content': 0.02150929532945156, 'timestamp': '2025-09-30 23:05:19.095329', 'step': 2879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:19.168363', 'step': 2879, 'epoch': 2} {'type': 'loss', 'content': 0.032463639974594116, 'timestamp': '2025-09-30 23:05:19.188475', 'step': 2880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:19.255446', 'step': 2880, 'epoch': 2} {'type': 'loss', 'content': 0.027787700295448303, 'timestamp': '2025-09-30 23:05:19.263135', 'step': 2881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:05:19.334277', 'step': 2881, 'epoch': 2} {'type': 'loss', 'content': 0.05016845092177391, 'timestamp': '2025-09-30 23:05:19.350999', 'step': 2882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:19.422678', 'step': 2882, 'epoch': 2} {'type': 'loss', 'content': 0.005855295807123184, 'timestamp': '2025-09-30 23:05:19.426213', 'step': 2883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:19.502721', 'step': 2883, 'epoch': 2} {'type': 'loss', 'content': 0.05044319108128548, 'timestamp': '2025-09-30 23:05:19.524427', 'step': 2884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:05:19.613298', 'step': 2884, 'epoch': 2} {'type': 'loss', 'content': 0.012174831703305244, 'timestamp': '2025-09-30 23:05:19.624965', 'step': 2885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:19.689980', 'step': 2885, 'epoch': 2} {'type': 'loss', 'content': 0.010662366636097431, 'timestamp': '2025-09-30 23:05:19.697872', 'step': 2886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:19.760687', 'step': 2886, 'epoch': 2} {'type': 'loss', 'content': 0.0066759041510522366, 'timestamp': '2025-09-30 23:05:19.765487', 'step': 2887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:19.828549', 'step': 2887, 'epoch': 2} {'type': 'loss', 'content': 0.03707655146718025, 'timestamp': '2025-09-30 23:05:19.839784', 'step': 2888, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1596914505344}], 'timestamp': '2025-09-30 23:05:25.497351', 'step': 2888, 'epoch': 2} {'type': 'pplx', 'content': 8094372.823234448, 'timestamp': '2025-09-30 23:05:25.509928', 'step': 2888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:25.574778', 'step': 2888, 'epoch': 2} {'type': 'loss', 'content': 0.03677240386605263, 'timestamp': '2025-09-30 23:05:25.583445', 'step': 2889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:25.664108', 'step': 2889, 'epoch': 2} {'type': 'loss', 'content': 0.013182049617171288, 'timestamp': '2025-09-30 23:05:25.674542', 'step': 2890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:25.749903', 'step': 2890, 'epoch': 2} {'type': 'loss', 'content': 0.01710132509469986, 'timestamp': '2025-09-30 23:05:25.756116', 'step': 2891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:25.826217', 'step': 2891, 'epoch': 2} {'type': 'loss', 'content': 0.07326041162014008, 'timestamp': '2025-09-30 23:05:25.843641', 'step': 2892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:25.925202', 'step': 2892, 'epoch': 2} {'type': 'loss', 'content': 0.009039844386279583, 'timestamp': '2025-09-30 23:05:25.932003', 'step': 2893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:26.003040', 'step': 2893, 'epoch': 2} {'type': 'loss', 'content': 0.012862849049270153, 'timestamp': '2025-09-30 23:05:26.010886', 'step': 2894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:05:26.096930', 'step': 2894, 'epoch': 2} {'type': 'loss', 'content': 0.018744587898254395, 'timestamp': '2025-09-30 23:05:26.100393', 'step': 2895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:26.192676', 'step': 2895, 'epoch': 2} {'type': 'loss', 'content': 0.022118987515568733, 'timestamp': '2025-09-30 23:05:26.205538', 'step': 2896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:26.286356', 'step': 2896, 'epoch': 2} {'type': 'loss', 'content': 0.015247358940541744, 'timestamp': '2025-09-30 23:05:26.294960', 'step': 2897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:26.370381', 'step': 2897, 'epoch': 2} {'type': 'loss', 'content': 0.018315086141228676, 'timestamp': '2025-09-30 23:05:26.382907', 'step': 2898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:26.458708', 'step': 2898, 'epoch': 2} {'type': 'loss', 'content': 0.01789654605090618, 'timestamp': '2025-09-30 23:05:26.462789', 'step': 2899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:26.533792', 'step': 2899, 'epoch': 2} {'type': 'loss', 'content': 0.011552369222044945, 'timestamp': '2025-09-30 23:05:26.546837', 'step': 2900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:26.629417', 'step': 2900, 'epoch': 2} {'type': 'loss', 'content': 0.04082349315285683, 'timestamp': '2025-09-30 23:05:26.632842', 'step': 2901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:26.702888', 'step': 2901, 'epoch': 2} {'type': 'loss', 'content': 0.041492484509944916, 'timestamp': '2025-09-30 23:05:26.713634', 'step': 2902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:26.786283', 'step': 2902, 'epoch': 2} {'type': 'loss', 'content': 0.05283446982502937, 'timestamp': '2025-09-30 23:05:26.798096', 'step': 2903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:26.874830', 'step': 2903, 'epoch': 2} {'type': 'loss', 'content': 0.036473535001277924, 'timestamp': '2025-09-30 23:05:26.888549', 'step': 2904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:26.973145', 'step': 2904, 'epoch': 2} {'type': 'loss', 'content': 0.009920150972902775, 'timestamp': '2025-09-30 23:05:26.983614', 'step': 2905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:27.055609', 'step': 2905, 'epoch': 2} {'type': 'loss', 'content': 0.013133547268807888, 'timestamp': '2025-09-30 23:05:27.063424', 'step': 2906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:27.134947', 'step': 2906, 'epoch': 2} {'type': 'loss', 'content': 0.02730463817715645, 'timestamp': '2025-09-30 23:05:27.141500', 'step': 2907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:27.224784', 'step': 2907, 'epoch': 2} {'type': 'loss', 'content': 0.030869409441947937, 'timestamp': '2025-09-30 23:05:27.232824', 'step': 2908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:27.302854', 'step': 2908, 'epoch': 2} {'type': 'loss', 'content': 0.013432594947516918, 'timestamp': '2025-09-30 23:05:27.312419', 'step': 2909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:27.391231', 'step': 2909, 'epoch': 2} {'type': 'loss', 'content': 0.05776163563132286, 'timestamp': '2025-09-30 23:05:27.402436', 'step': 2910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:27.493265', 'step': 2910, 'epoch': 2} {'type': 'loss', 'content': 0.016445111483335495, 'timestamp': '2025-09-30 23:05:27.505699', 'step': 2911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:27.602107', 'step': 2911, 'epoch': 2} {'type': 'loss', 'content': 0.028878185898065567, 'timestamp': '2025-09-30 23:05:27.610918', 'step': 2912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:27.696294', 'step': 2912, 'epoch': 2} {'type': 'loss', 'content': 0.0367799773812294, 'timestamp': '2025-09-30 23:05:27.700661', 'step': 2913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:27.767629', 'step': 2913, 'epoch': 2} {'type': 'loss', 'content': 0.03689088672399521, 'timestamp': '2025-09-30 23:05:27.778172', 'step': 2914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:27.861735', 'step': 2914, 'epoch': 2} {'type': 'loss', 'content': 0.024414388462901115, 'timestamp': '2025-09-30 23:05:27.871310', 'step': 2915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:05:27.949996', 'step': 2915, 'epoch': 2} {'type': 'loss', 'content': 0.05595361068844795, 'timestamp': '2025-09-30 23:05:27.964907', 'step': 2916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:28.041728', 'step': 2916, 'epoch': 2} {'type': 'loss', 'content': 0.01913231983780861, 'timestamp': '2025-09-30 23:05:28.052335', 'step': 2917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:28.138077', 'step': 2917, 'epoch': 2} {'type': 'loss', 'content': 0.0337190106511116, 'timestamp': '2025-09-30 23:05:28.146731', 'step': 2918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:28.221633', 'step': 2918, 'epoch': 2} {'type': 'loss', 'content': 0.019869180396199226, 'timestamp': '2025-09-30 23:05:28.230855', 'step': 2919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:28.321891', 'step': 2919, 'epoch': 2} {'type': 'loss', 'content': 0.019437333568930626, 'timestamp': '2025-09-30 23:05:28.331794', 'step': 2920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:28.405509', 'step': 2920, 'epoch': 2} {'type': 'loss', 'content': 0.01644965261220932, 'timestamp': '2025-09-30 23:05:28.410896', 'step': 2921, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:28.468208', 'step': 2921, 'epoch': 2} {'type': 'loss', 'content': 0.0731881782412529, 'timestamp': '2025-09-30 23:05:28.478512', 'step': 2922, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:28.558316', 'step': 2922, 'epoch': 2} {'type': 'loss', 'content': 0.01810661144554615, 'timestamp': '2025-09-30 23:05:28.562567', 'step': 2923, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:28.640135', 'step': 2923, 'epoch': 2} {'type': 'loss', 'content': 0.008589093573391438, 'timestamp': '2025-09-30 23:05:28.656018', 'step': 2924, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:28.740182', 'step': 2924, 'epoch': 2} {'type': 'loss', 'content': 0.04549485072493553, 'timestamp': '2025-09-30 23:05:28.749246', 'step': 2925, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:28.832721', 'step': 2925, 'epoch': 2} {'type': 'loss', 'content': 0.0054592713713645935, 'timestamp': '2025-09-30 23:05:28.835345', 'step': 2926, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:28.918982', 'step': 2926, 'epoch': 2} {'type': 'loss', 'content': 0.028082478791475296, 'timestamp': '2025-09-30 23:05:28.926772', 'step': 2927, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:28.998013', 'step': 2927, 'epoch': 2} {'type': 'loss', 'content': 0.0164317823946476, 'timestamp': '2025-09-30 23:05:29.007132', 'step': 2928, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:29.079344', 'step': 2928, 'epoch': 2} {'type': 'loss', 'content': 0.05340416356921196, 'timestamp': '2025-09-30 23:05:29.085572', 'step': 2929, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:29.152087', 'step': 2929, 'epoch': 2} {'type': 'loss', 'content': 0.023602964356541634, 'timestamp': '2025-09-30 23:05:29.168954', 'step': 2930, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:29.271447', 'step': 2930, 'epoch': 2} {'type': 'loss', 'content': 0.017615508288145065, 'timestamp': '2025-09-30 23:05:29.278395', 'step': 2931, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:29.355222', 'step': 2931, 'epoch': 2} {'type': 'loss', 'content': 0.009241404011845589, 'timestamp': '2025-09-30 23:05:29.379940', 'step': 2932, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:29.503957', 'step': 2932, 'epoch': 2} {'type': 'loss', 'content': 0.021032793447375298, 'timestamp': '2025-09-30 23:05:29.526083', 'step': 2933, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:29.632528', 'step': 2933, 'epoch': 2} {'type': 'loss', 'content': 0.02118886448442936, 'timestamp': '2025-09-30 23:05:29.636211', 'step': 2934, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:29.711413', 'step': 2934, 'epoch': 2} {'type': 'loss', 'content': 0.013830102980136871, 'timestamp': '2025-09-30 23:05:29.723348', 'step': 2935, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:29.798794', 'step': 2935, 'epoch': 2} {'type': 'loss', 'content': 0.006135399453341961, 'timestamp': '2025-09-30 23:05:29.816151', 'step': 2936, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:29.892078', 'step': 2936, 'epoch': 2} {'type': 'loss', 'content': 0.005109884310513735, 'timestamp': '2025-09-30 23:05:29.905299', 'step': 2937, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:29.987865', 'step': 2937, 'epoch': 2} {'type': 'loss', 'content': 0.013165413402020931, 'timestamp': '2025-09-30 23:05:29.993842', 'step': 2938, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:30.056993', 'step': 2938, 'epoch': 2} {'type': 'loss', 'content': 0.06098811700940132, 'timestamp': '2025-09-30 23:05:30.062331', 'step': 2939, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:30.126191', 'step': 2939, 'epoch': 2} {'type': 'loss', 'content': 0.019830357283353806, 'timestamp': '2025-09-30 23:05:30.134374', 'step': 2940, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:30.193192', 'step': 2940, 'epoch': 2} {'type': 'loss', 'content': 0.028469601646065712, 'timestamp': '2025-09-30 23:05:30.195771', 'step': 2941, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:30.257121', 'step': 2941, 'epoch': 2} {'type': 'loss', 'content': 0.02163900062441826, 'timestamp': '2025-09-30 23:05:30.262875', 'step': 2942, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:30.326978', 'step': 2942, 'epoch': 2} {'type': 'loss', 'content': 0.021107107400894165, 'timestamp': '2025-09-30 23:05:30.329683', 'step': 2943, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:30.390580', 'step': 2943, 'epoch': 2} {'type': 'loss', 'content': 0.01834254525601864, 'timestamp': '2025-09-30 23:05:30.398658', 'step': 2944, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:30.466586', 'step': 2944, 'epoch': 2} {'type': 'loss', 'content': 0.010087335482239723, 'timestamp': '2025-09-30 23:05:30.469862', 'step': 2945, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:05:30.540494', 'step': 2945, 'epoch': 2} {'type': 'loss', 'content': 0.01358881313353777, 'timestamp': '2025-09-30 23:05:30.551393', 'step': 2946, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:30.627883', 'step': 2946, 'epoch': 2} {'type': 'loss', 'content': 0.016866762191057205, 'timestamp': '2025-09-30 23:05:30.635328', 'step': 2947, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:30.723389', 'step': 2947, 'epoch': 2} {'type': 'loss', 'content': 0.02342984266579151, 'timestamp': '2025-09-30 23:05:30.746239', 'step': 2948, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:30.844900', 'step': 2948, 'epoch': 2} {'type': 'loss', 'content': 0.030765479430556297, 'timestamp': '2025-09-30 23:05:30.849872', 'step': 2949, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:05:30.958145', 'step': 2949, 'epoch': 2} {'type': 'loss', 'content': 0.04516691714525223, 'timestamp': '2025-09-30 23:05:30.967691', 'step': 2950, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:31.049446', 'step': 2950, 'epoch': 2} {'type': 'loss', 'content': 0.011260425671935081, 'timestamp': '2025-09-30 23:05:31.063356', 'step': 2951, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:31.132320', 'step': 2951, 'epoch': 2} {'type': 'loss', 'content': 0.034232694655656815, 'timestamp': '2025-09-30 23:05:31.147347', 'step': 2952, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:31.224258', 'step': 2952, 'epoch': 2} {'type': 'loss', 'content': 0.0030731488950550556, 'timestamp': '2025-09-30 23:05:31.231976', 'step': 2953, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:31.303377', 'step': 2953, 'epoch': 2} {'type': 'loss', 'content': 0.02917398139834404, 'timestamp': '2025-09-30 23:05:31.311632', 'step': 2954, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:31.379016', 'step': 2954, 'epoch': 2} {'type': 'loss', 'content': 0.014205610379576683, 'timestamp': '2025-09-30 23:05:31.391958', 'step': 2955, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:31.453760', 'step': 2955, 'epoch': 2} {'type': 'loss', 'content': 0.01730748452246189, 'timestamp': '2025-09-30 23:05:31.465364', 'step': 2956, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:05:31.535034', 'step': 2956, 'epoch': 2} {'type': 'loss', 'content': 0.004983804654330015, 'timestamp': '2025-09-30 23:05:31.539909', 'step': 2957, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:31.602667', 'step': 2957, 'epoch': 2} {'type': 'loss', 'content': 0.006316361483186483, 'timestamp': '2025-09-30 23:05:31.611528', 'step': 2958, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:31.696308', 'step': 2958, 'epoch': 2} {'type': 'loss', 'content': 0.04499377682805061, 'timestamp': '2025-09-30 23:05:31.711841', 'step': 2959, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:31.786133', 'step': 2959, 'epoch': 2} {'type': 'loss', 'content': 0.007001515477895737, 'timestamp': '2025-09-30 23:05:31.802095', 'step': 2960, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:31.871321', 'step': 2960, 'epoch': 2} {'type': 'loss', 'content': 0.025294626131653786, 'timestamp': '2025-09-30 23:05:31.874994', 'step': 2961, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:31.942816', 'step': 2961, 'epoch': 2} {'type': 'loss', 'content': 0.014891204424202442, 'timestamp': '2025-09-30 23:05:31.945597', 'step': 2962, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:32.003137', 'step': 2962, 'epoch': 2} {'type': 'loss', 'content': 0.02651948668062687, 'timestamp': '2025-09-30 23:05:32.006905', 'step': 2963, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:32.065630', 'step': 2963, 'epoch': 2} {'type': 'loss', 'content': 0.012213872745633125, 'timestamp': '2025-09-30 23:05:32.074029', 'step': 2964, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:32.131877', 'step': 2964, 'epoch': 2} {'type': 'loss', 'content': 0.05758679658174515, 'timestamp': '2025-09-30 23:05:32.135555', 'step': 2965, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:05:32.195359', 'step': 2965, 'epoch': 2} {'type': 'loss', 'content': 0.03389124199748039, 'timestamp': '2025-09-30 23:05:32.198620', 'step': 2966, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:32.257273', 'step': 2966, 'epoch': 2} {'type': 'loss', 'content': 0.01986263319849968, 'timestamp': '2025-09-30 23:05:32.260300', 'step': 2967, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:32.317554', 'step': 2967, 'epoch': 2} {'type': 'loss', 'content': 0.04568326845765114, 'timestamp': '2025-09-30 23:05:32.324726', 'step': 2968, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:05:32.381405', 'step': 2968, 'epoch': 2} {'type': 'loss', 'content': 0.0024693883024156094, 'timestamp': '2025-09-30 23:05:32.384221', 'step': 2969, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:32.439568', 'step': 2969, 'epoch': 2} {'type': 'loss', 'content': 0.020492201671004295, 'timestamp': '2025-09-30 23:05:32.442701', 'step': 2970, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:32.499815', 'step': 2970, 'epoch': 2} {'type': 'loss', 'content': 0.00908744428306818, 'timestamp': '2025-09-30 23:05:32.502884', 'step': 2971, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:32.565101', 'step': 2971, 'epoch': 2} {'type': 'loss', 'content': 0.00759161775931716, 'timestamp': '2025-09-30 23:05:32.576376', 'step': 2972, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:32.647807', 'step': 2972, 'epoch': 2} {'type': 'loss', 'content': 0.0006053223041817546, 'timestamp': '2025-09-30 23:05:32.652483', 'step': 2973, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:32.721364', 'step': 2973, 'epoch': 2} {'type': 'loss', 'content': 0.00908237136900425, 'timestamp': '2025-09-30 23:05:32.726735', 'step': 2974, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:32.789940', 'step': 2974, 'epoch': 2} {'type': 'loss', 'content': 0.00307060731574893, 'timestamp': '2025-09-30 23:05:32.793438', 'step': 2975, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:32.852231', 'step': 2975, 'epoch': 2} {'type': 'loss', 'content': 0.0036478987894952297, 'timestamp': '2025-09-30 23:05:32.862480', 'step': 2976, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:32.919582', 'step': 2976, 'epoch': 2} {'type': 'loss', 'content': 0.007946545258164406, 'timestamp': '2025-09-30 23:05:32.922664', 'step': 2977, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:05:32.977489', 'step': 2977, 'epoch': 2} {'type': 'loss', 'content': 0.07434551417827606, 'timestamp': '2025-09-30 23:05:32.980704', 'step': 2978, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:33.035425', 'step': 2978, 'epoch': 2} {'type': 'loss', 'content': 0.005639293231070042, 'timestamp': '2025-09-30 23:05:33.040888', 'step': 2979, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:33.103512', 'step': 2979, 'epoch': 2} {'type': 'loss', 'content': 0.005824704188853502, 'timestamp': '2025-09-30 23:05:33.113803', 'step': 2980, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:33.176314', 'step': 2980, 'epoch': 2} {'type': 'loss', 'content': 0.013461491093039513, 'timestamp': '2025-09-30 23:05:33.184484', 'step': 2981, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:33.242377', 'step': 2981, 'epoch': 2} {'type': 'loss', 'content': 0.012743769213557243, 'timestamp': '2025-09-30 23:05:33.245419', 'step': 2982, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:33.301269', 'step': 2982, 'epoch': 2} {'type': 'loss', 'content': 0.03013800084590912, 'timestamp': '2025-09-30 23:05:33.304417', 'step': 2983, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:33.360160', 'step': 2983, 'epoch': 2} {'type': 'loss', 'content': 0.003911891952157021, 'timestamp': '2025-09-30 23:05:33.367254', 'step': 2984, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:05:33.421520', 'step': 2984, 'epoch': 2} {'type': 'loss', 'content': 0.005459845066070557, 'timestamp': '2025-09-30 23:05:33.425313', 'step': 2985, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:33.486908', 'step': 2985, 'epoch': 2} {'type': 'loss', 'content': 0.025441233068704605, 'timestamp': '2025-09-30 23:05:33.490215', 'step': 2986, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:33.544286', 'step': 2986, 'epoch': 2} {'type': 'loss', 'content': 0.005099098198115826, 'timestamp': '2025-09-30 23:05:33.548000', 'step': 2987, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:33.602187', 'step': 2987, 'epoch': 2} {'type': 'loss', 'content': 0.026611968874931335, 'timestamp': '2025-09-30 23:05:33.615772', 'step': 2988, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:33.668989', 'step': 2988, 'epoch': 2} {'type': 'loss', 'content': 0.007369569502770901, 'timestamp': '2025-09-30 23:05:33.672956', 'step': 2989, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:33.732874', 'step': 2989, 'epoch': 2} {'type': 'loss', 'content': 0.03735092654824257, 'timestamp': '2025-09-30 23:05:33.737476', 'step': 2990, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:33.793804', 'step': 2990, 'epoch': 2} {'type': 'loss', 'content': 0.024885132908821106, 'timestamp': '2025-09-30 23:05:33.796898', 'step': 2991, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:33.852604', 'step': 2991, 'epoch': 2} {'type': 'loss', 'content': 0.020777851343154907, 'timestamp': '2025-09-30 23:05:33.864807', 'step': 2992, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:33.930289', 'step': 2992, 'epoch': 2} {'type': 'loss', 'content': 0.009329400025308132, 'timestamp': '2025-09-30 23:05:33.937854', 'step': 2993, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:34.006813', 'step': 2993, 'epoch': 2} {'type': 'loss', 'content': 0.010109909810125828, 'timestamp': '2025-09-30 23:05:34.010907', 'step': 2994, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:34.066969', 'step': 2994, 'epoch': 2} {'type': 'loss', 'content': 0.0027911902870982885, 'timestamp': '2025-09-30 23:05:34.071000', 'step': 2995, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:34.128617', 'step': 2995, 'epoch': 2} {'type': 'loss', 'content': 0.010242600925266743, 'timestamp': '2025-09-30 23:05:34.135584', 'step': 2996, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:34.193110', 'step': 2996, 'epoch': 2} {'type': 'loss', 'content': 0.023415153846144676, 'timestamp': '2025-09-30 23:05:34.196102', 'step': 2997, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:34.259080', 'step': 2997, 'epoch': 2} {'type': 'loss', 'content': 0.05155562236905098, 'timestamp': '2025-09-30 23:05:34.261967', 'step': 2998, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:05:34.317207', 'step': 2998, 'epoch': 2} {'type': 'loss', 'content': 0.06545253843069077, 'timestamp': '2025-09-30 23:05:34.319830', 'step': 2999, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:34.375694', 'step': 2999, 'epoch': 2} {'type': 'loss', 'content': 0.06804903596639633, 'timestamp': '2025-09-30 23:05:34.382362', 'step': 3000, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 3000', 'timestamp': '2025-09-30 23:05:34.792358', 'step': 3000, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:05:34.852322', 'step': 3000, 'epoch': 2} {'type': 'loss', 'content': 0.01065895240753889, 'timestamp': '2025-09-30 23:05:34.861050', 'step': 3001, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:05:34.937067', 'step': 3001, 'epoch': 2} {'type': 'loss', 'content': 0.04352938011288643, 'timestamp': '2025-09-30 23:05:34.940999', 'step': 3002, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:35.010025', 'step': 3002, 'epoch': 2} {'type': 'loss', 'content': 0.028224527835845947, 'timestamp': '2025-09-30 23:05:35.020099', 'step': 3003, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:35.085213', 'step': 3003, 'epoch': 2} {'type': 'loss', 'content': 0.004245622083544731, 'timestamp': '2025-09-30 23:05:35.092949', 'step': 3004, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:35.156500', 'step': 3004, 'epoch': 2} {'type': 'loss', 'content': 0.0014710044488310814, 'timestamp': '2025-09-30 23:05:35.161409', 'step': 3005, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:35.219793', 'step': 3005, 'epoch': 2} {'type': 'loss', 'content': 0.056311603635549545, 'timestamp': '2025-09-30 23:05:35.223376', 'step': 3006, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:35.282836', 'step': 3006, 'epoch': 2} {'type': 'loss', 'content': 0.02040809765458107, 'timestamp': '2025-09-30 23:05:35.286009', 'step': 3007, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:35.341110', 'step': 3007, 'epoch': 2} {'type': 'loss', 'content': 0.016531098634004593, 'timestamp': '2025-09-30 23:05:35.348535', 'step': 3008, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:35.402955', 'step': 3008, 'epoch': 2} {'type': 'loss', 'content': 0.0720641240477562, 'timestamp': '2025-09-30 23:05:35.406979', 'step': 3009, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:35.462246', 'step': 3009, 'epoch': 2} {'type': 'loss', 'content': 0.03155646473169327, 'timestamp': '2025-09-30 23:05:35.465824', 'step': 3010, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:35.538523', 'step': 3010, 'epoch': 2} {'type': 'loss', 'content': 0.00818681251257658, 'timestamp': '2025-09-30 23:05:35.548022', 'step': 3011, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:35.607223', 'step': 3011, 'epoch': 2} {'type': 'loss', 'content': 0.011452289298176765, 'timestamp': '2025-09-30 23:05:35.615563', 'step': 3012, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:35.678947', 'step': 3012, 'epoch': 2} {'type': 'loss', 'content': 0.015635674819350243, 'timestamp': '2025-09-30 23:05:35.684075', 'step': 3013, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:35.755759', 'step': 3013, 'epoch': 2} {'type': 'loss', 'content': 0.021075263619422913, 'timestamp': '2025-09-30 23:05:35.765327', 'step': 3014, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:35.835586', 'step': 3014, 'epoch': 2} {'type': 'loss', 'content': 0.002126485574990511, 'timestamp': '2025-09-30 23:05:35.843537', 'step': 3015, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:35.906666', 'step': 3015, 'epoch': 2} {'type': 'loss', 'content': 0.026002509519457817, 'timestamp': '2025-09-30 23:05:35.914709', 'step': 3016, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:35.979619', 'step': 3016, 'epoch': 2} {'type': 'loss', 'content': 0.004538468085229397, 'timestamp': '2025-09-30 23:05:35.987489', 'step': 3017, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:36.052481', 'step': 3017, 'epoch': 2} {'type': 'loss', 'content': 0.054896268993616104, 'timestamp': '2025-09-30 23:05:36.060711', 'step': 3018, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:36.119719', 'step': 3018, 'epoch': 2} {'type': 'loss', 'content': 0.01183659490197897, 'timestamp': '2025-09-30 23:05:36.124348', 'step': 3019, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:36.188223', 'step': 3019, 'epoch': 2} {'type': 'loss', 'content': 0.009769381955265999, 'timestamp': '2025-09-30 23:05:36.196044', 'step': 3020, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:05:36.255639', 'step': 3020, 'epoch': 2} {'type': 'loss', 'content': 0.006870661396533251, 'timestamp': '2025-09-30 23:05:36.259763', 'step': 3021, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:36.320510', 'step': 3021, 'epoch': 2} {'type': 'loss', 'content': 0.011392041109502316, 'timestamp': '2025-09-30 23:05:36.325197', 'step': 3022, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:36.386490', 'step': 3022, 'epoch': 2} {'type': 'loss', 'content': 0.026720697060227394, 'timestamp': '2025-09-30 23:05:36.392259', 'step': 3023, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:36.452887', 'step': 3023, 'epoch': 2} {'type': 'loss', 'content': 0.023440107703208923, 'timestamp': '2025-09-30 23:05:36.465097', 'step': 3024, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:05:36.545169', 'step': 3024, 'epoch': 2} {'type': 'loss', 'content': 0.028765974566340446, 'timestamp': '2025-09-30 23:05:36.553409', 'step': 3025, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:36.626200', 'step': 3025, 'epoch': 2} {'type': 'loss', 'content': 0.0088735306635499, 'timestamp': '2025-09-30 23:05:36.631553', 'step': 3026, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:36.697399', 'step': 3026, 'epoch': 2} {'type': 'loss', 'content': 0.004252955783158541, 'timestamp': '2025-09-30 23:05:36.702275', 'step': 3027, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:36.765780', 'step': 3027, 'epoch': 2} {'type': 'loss', 'content': 0.029675420373678207, 'timestamp': '2025-09-30 23:05:36.772406', 'step': 3028, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:36.842125', 'step': 3028, 'epoch': 2} {'type': 'loss', 'content': 0.036009080708026886, 'timestamp': '2025-09-30 23:05:36.850496', 'step': 3029, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:36.909899', 'step': 3029, 'epoch': 2} {'type': 'loss', 'content': 0.010113522410392761, 'timestamp': '2025-09-30 23:05:36.915505', 'step': 3030, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:36.977882', 'step': 3030, 'epoch': 2} {'type': 'loss', 'content': 0.02721237763762474, 'timestamp': '2025-09-30 23:05:36.983718', 'step': 3031, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:37.054751', 'step': 3031, 'epoch': 2} {'type': 'loss', 'content': 0.002630955306813121, 'timestamp': '2025-09-30 23:05:37.063975', 'step': 3032, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:37.124487', 'step': 3032, 'epoch': 2} {'type': 'loss', 'content': 0.02598247490823269, 'timestamp': '2025-09-30 23:05:37.127382', 'step': 3033, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:37.190602', 'step': 3033, 'epoch': 2} {'type': 'loss', 'content': 0.011081978678703308, 'timestamp': '2025-09-30 23:05:37.197513', 'step': 3034, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:37.260370', 'step': 3034, 'epoch': 2} {'type': 'loss', 'content': 0.0010622665286064148, 'timestamp': '2025-09-30 23:05:37.265447', 'step': 3035, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:37.327594', 'step': 3035, 'epoch': 2} {'type': 'loss', 'content': 0.05690154433250427, 'timestamp': '2025-09-30 23:05:37.336649', 'step': 3036, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:05:37.397270', 'step': 3036, 'epoch': 2} {'type': 'loss', 'content': 0.04230496659874916, 'timestamp': '2025-09-30 23:05:37.404544', 'step': 3037, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:05:37.466699', 'step': 3037, 'epoch': 2} {'type': 'loss', 'content': 0.03770161420106888, 'timestamp': '2025-09-30 23:05:37.480520', 'step': 3038, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:37.566440', 'step': 3038, 'epoch': 2} {'type': 'loss', 'content': 0.004079726524651051, 'timestamp': '2025-09-30 23:05:37.576186', 'step': 3039, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:37.660669', 'step': 3039, 'epoch': 2} {'type': 'loss', 'content': 0.017340848222374916, 'timestamp': '2025-09-30 23:05:37.676036', 'step': 3040, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1596914505344}], 'timestamp': '2025-09-30 23:05:42.528736', 'step': 3040, 'epoch': 2} {'type': 'pplx', 'content': 6834669.334838629, 'timestamp': '2025-09-30 23:05:42.534485', 'step': 3040, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:42.592832', 'step': 3040, 'epoch': 2} {'type': 'loss', 'content': 0.02380545437335968, 'timestamp': '2025-09-30 23:05:42.599301', 'step': 3041, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:05:42.665124', 'step': 3041, 'epoch': 2} {'type': 'loss', 'content': 0.021835695952177048, 'timestamp': '2025-09-30 23:05:42.670197', 'step': 3042, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:42.738287', 'step': 3042, 'epoch': 2} {'type': 'loss', 'content': 0.0011707906378433108, 'timestamp': '2025-09-30 23:05:42.743039', 'step': 3043, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:42.804299', 'step': 3043, 'epoch': 2} {'type': 'loss', 'content': 0.022792693227529526, 'timestamp': '2025-09-30 23:05:42.812529', 'step': 3044, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:42.874070', 'step': 3044, 'epoch': 2} {'type': 'loss', 'content': 0.05219408497214317, 'timestamp': '2025-09-30 23:05:42.882858', 'step': 3045, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:05:42.971471', 'step': 3045, 'epoch': 2} {'type': 'loss', 'content': 0.0232208464294672, 'timestamp': '2025-09-30 23:05:42.981415', 'step': 3046, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:43.067724', 'step': 3046, 'epoch': 2} {'type': 'loss', 'content': 0.014135838486254215, 'timestamp': '2025-09-30 23:05:43.077350', 'step': 3047, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:43.150125', 'step': 3047, 'epoch': 2} {'type': 'loss', 'content': 0.007330916356295347, 'timestamp': '2025-09-30 23:05:43.160832', 'step': 3048, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:43.229702', 'step': 3048, 'epoch': 2} {'type': 'loss', 'content': 0.023887965828180313, 'timestamp': '2025-09-30 23:05:43.233905', 'step': 3049, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:43.293768', 'step': 3049, 'epoch': 2} {'type': 'loss', 'content': 0.033102426677942276, 'timestamp': '2025-09-30 23:05:43.299749', 'step': 3050, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:43.361823', 'step': 3050, 'epoch': 2} {'type': 'loss', 'content': 0.009587721899151802, 'timestamp': '2025-09-30 23:05:43.367257', 'step': 3051, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:43.432278', 'step': 3051, 'epoch': 2} {'type': 'loss', 'content': 0.003975891508162022, 'timestamp': '2025-09-30 23:05:43.442896', 'step': 3052, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:43.506729', 'step': 3052, 'epoch': 2} {'type': 'loss', 'content': 0.04640847072005272, 'timestamp': '2025-09-30 23:05:43.509917', 'step': 3053, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:43.581656', 'step': 3053, 'epoch': 2} {'type': 'loss', 'content': 0.017967838793992996, 'timestamp': '2025-09-30 23:05:43.595674', 'step': 3054, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:43.686215', 'step': 3054, 'epoch': 2} {'type': 'loss', 'content': 0.015846261754631996, 'timestamp': '2025-09-30 23:05:43.697473', 'step': 3055, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:43.786824', 'step': 3055, 'epoch': 2} {'type': 'loss', 'content': 0.008151931688189507, 'timestamp': '2025-09-30 23:05:43.796480', 'step': 3056, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:43.860179', 'step': 3056, 'epoch': 2} {'type': 'loss', 'content': 0.0031493324786424637, 'timestamp': '2025-09-30 23:05:43.867367', 'step': 3057, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:05:43.937830', 'step': 3057, 'epoch': 2} {'type': 'loss', 'content': 0.023284194990992546, 'timestamp': '2025-09-30 23:05:43.947963', 'step': 3058, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:05:44.020523', 'step': 3058, 'epoch': 2} {'type': 'loss', 'content': 0.034065309911966324, 'timestamp': '2025-09-30 23:05:44.029302', 'step': 3059, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:44.102299', 'step': 3059, 'epoch': 2} {'type': 'loss', 'content': 0.05246307700872421, 'timestamp': '2025-09-30 23:05:44.109492', 'step': 3060, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:44.169479', 'step': 3060, 'epoch': 2} {'type': 'loss', 'content': 0.031259745359420776, 'timestamp': '2025-09-30 23:05:44.177427', 'step': 3061, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:44.246466', 'step': 3061, 'epoch': 2} {'type': 'loss', 'content': 0.022127270698547363, 'timestamp': '2025-09-30 23:05:44.255598', 'step': 3062, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:44.327995', 'step': 3062, 'epoch': 2} {'type': 'loss', 'content': 0.006738060619682074, 'timestamp': '2025-09-30 23:05:44.337428', 'step': 3063, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:44.410478', 'step': 3063, 'epoch': 2} {'type': 'loss', 'content': 0.0062853312119841576, 'timestamp': '2025-09-30 23:05:44.421545', 'step': 3064, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:44.486323', 'step': 3064, 'epoch': 2} {'type': 'loss', 'content': 0.0063713314011693, 'timestamp': '2025-09-30 23:05:44.491942', 'step': 3065, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:44.559526', 'step': 3065, 'epoch': 2} {'type': 'loss', 'content': 0.030077366158366203, 'timestamp': '2025-09-30 23:05:44.564127', 'step': 3066, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:44.634045', 'step': 3066, 'epoch': 2} {'type': 'loss', 'content': 0.02513028308749199, 'timestamp': '2025-09-30 23:05:44.643036', 'step': 3067, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:44.716546', 'step': 3067, 'epoch': 2} {'type': 'loss', 'content': 0.03766511008143425, 'timestamp': '2025-09-30 23:05:44.727364', 'step': 3068, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:44.800566', 'step': 3068, 'epoch': 2} {'type': 'loss', 'content': 0.0018595476867631078, 'timestamp': '2025-09-30 23:05:44.803173', 'step': 3069, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:44.870240', 'step': 3069, 'epoch': 2} {'type': 'loss', 'content': 0.02282741479575634, 'timestamp': '2025-09-30 23:05:44.878239', 'step': 3070, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:44.945091', 'step': 3070, 'epoch': 2} {'type': 'loss', 'content': 0.03191061317920685, 'timestamp': '2025-09-30 23:05:44.955312', 'step': 3071, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:45.027309', 'step': 3071, 'epoch': 2} {'type': 'loss', 'content': 0.02246817946434021, 'timestamp': '2025-09-30 23:05:45.040046', 'step': 3072, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:45.120400', 'step': 3072, 'epoch': 2} {'type': 'loss', 'content': 0.0031576398760080338, 'timestamp': '2025-09-30 23:05:45.131948', 'step': 3073, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:45.212206', 'step': 3073, 'epoch': 2} {'type': 'loss', 'content': 0.07550687342882156, 'timestamp': '2025-09-30 23:05:45.216606', 'step': 3074, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:45.301308', 'step': 3074, 'epoch': 2} {'type': 'loss', 'content': 0.014949892647564411, 'timestamp': '2025-09-30 23:05:45.304872', 'step': 3075, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:45.372014', 'step': 3075, 'epoch': 2} {'type': 'loss', 'content': 0.014469780959188938, 'timestamp': '2025-09-30 23:05:45.383216', 'step': 3076, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:45.451192', 'step': 3076, 'epoch': 2} {'type': 'loss', 'content': 0.010722544975578785, 'timestamp': '2025-09-30 23:05:45.454071', 'step': 3077, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:45.521019', 'step': 3077, 'epoch': 2} {'type': 'loss', 'content': 0.015758831053972244, 'timestamp': '2025-09-30 23:05:45.525032', 'step': 3078, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:45.605988', 'step': 3078, 'epoch': 2} {'type': 'loss', 'content': 0.022281724959611893, 'timestamp': '2025-09-30 23:05:45.608974', 'step': 3079, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:45.682023', 'step': 3079, 'epoch': 2} {'type': 'loss', 'content': 0.014169248752295971, 'timestamp': '2025-09-30 23:05:45.698148', 'step': 3080, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:45.773848', 'step': 3080, 'epoch': 2} {'type': 'loss', 'content': 0.0387117974460125, 'timestamp': '2025-09-30 23:05:45.779640', 'step': 3081, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:45.847083', 'step': 3081, 'epoch': 2} {'type': 'loss', 'content': 0.029980897903442383, 'timestamp': '2025-09-30 23:05:45.855143', 'step': 3082, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:45.926046', 'step': 3082, 'epoch': 2} {'type': 'loss', 'content': 0.025191528722643852, 'timestamp': '2025-09-30 23:05:45.937855', 'step': 3083, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:46.014081', 'step': 3083, 'epoch': 2} {'type': 'loss', 'content': 0.033525947481393814, 'timestamp': '2025-09-30 23:05:46.026598', 'step': 3084, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:05:46.101372', 'step': 3084, 'epoch': 2} {'type': 'loss', 'content': 0.01364533044397831, 'timestamp': '2025-09-30 23:05:46.106709', 'step': 3085, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:46.173717', 'step': 3085, 'epoch': 2} {'type': 'loss', 'content': 0.022073868662118912, 'timestamp': '2025-09-30 23:05:46.176519', 'step': 3086, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:46.233681', 'step': 3086, 'epoch': 2} {'type': 'loss', 'content': 0.0029270045924931765, 'timestamp': '2025-09-30 23:05:46.239036', 'step': 3087, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:05:46.304880', 'step': 3087, 'epoch': 2} {'type': 'loss', 'content': 0.007999125868082047, 'timestamp': '2025-09-30 23:05:46.313304', 'step': 3088, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:46.373620', 'step': 3088, 'epoch': 2} {'type': 'loss', 'content': 0.008323193527758121, 'timestamp': '2025-09-30 23:05:46.380305', 'step': 3089, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:46.443165', 'step': 3089, 'epoch': 2} {'type': 'loss', 'content': 0.021273095160722733, 'timestamp': '2025-09-30 23:05:46.457612', 'step': 3090, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:46.523942', 'step': 3090, 'epoch': 2} {'type': 'loss', 'content': 0.010587443597614765, 'timestamp': '2025-09-30 23:05:46.534150', 'step': 3091, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:46.613730', 'step': 3091, 'epoch': 2} {'type': 'loss', 'content': 0.030795305967330933, 'timestamp': '2025-09-30 23:05:46.620861', 'step': 3092, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:46.685905', 'step': 3092, 'epoch': 2} {'type': 'loss', 'content': 0.032362181693315506, 'timestamp': '2025-09-30 23:05:46.692717', 'step': 3093, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:46.760673', 'step': 3093, 'epoch': 2} {'type': 'loss', 'content': 0.023158472031354904, 'timestamp': '2025-09-30 23:05:46.767804', 'step': 3094, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:46.838622', 'step': 3094, 'epoch': 2} {'type': 'loss', 'content': 0.01693158969283104, 'timestamp': '2025-09-30 23:05:46.842267', 'step': 3095, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:46.900695', 'step': 3095, 'epoch': 2} {'type': 'loss', 'content': 0.021069347858428955, 'timestamp': '2025-09-30 23:05:46.911232', 'step': 3096, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:46.984781', 'step': 3096, 'epoch': 2} {'type': 'loss', 'content': 0.029919445514678955, 'timestamp': '2025-09-30 23:05:46.995884', 'step': 3097, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:47.073339', 'step': 3097, 'epoch': 2} {'type': 'loss', 'content': 0.029690396040678024, 'timestamp': '2025-09-30 23:05:47.080980', 'step': 3098, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:47.151099', 'step': 3098, 'epoch': 2} {'type': 'loss', 'content': 0.018051637336611748, 'timestamp': '2025-09-30 23:05:47.154598', 'step': 3099, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:47.215003', 'step': 3099, 'epoch': 2} {'type': 'loss', 'content': 0.004005540627986193, 'timestamp': '2025-09-30 23:05:47.222171', 'step': 3100, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:47.278194', 'step': 3100, 'epoch': 2} {'type': 'loss', 'content': 0.006286539603024721, 'timestamp': '2025-09-30 23:05:47.282321', 'step': 3101, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:05:47.354119', 'step': 3101, 'epoch': 2} {'type': 'loss', 'content': 0.005222097504884005, 'timestamp': '2025-09-30 23:05:47.357578', 'step': 3102, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:47.428435', 'step': 3102, 'epoch': 2} {'type': 'loss', 'content': 0.04089021682739258, 'timestamp': '2025-09-30 23:05:47.432193', 'step': 3103, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:05:47.489495', 'step': 3103, 'epoch': 2} {'type': 'loss', 'content': 0.012418181635439396, 'timestamp': '2025-09-30 23:05:47.498577', 'step': 3104, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:47.554237', 'step': 3104, 'epoch': 2} {'type': 'loss', 'content': 0.021858353167772293, 'timestamp': '2025-09-30 23:05:47.556318', 'step': 3105, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:47.622066', 'step': 3105, 'epoch': 2} {'type': 'loss', 'content': 0.015437078662216663, 'timestamp': '2025-09-30 23:05:47.629550', 'step': 3106, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:47.715190', 'step': 3106, 'epoch': 2} {'type': 'loss', 'content': 0.0016458496684208512, 'timestamp': '2025-09-30 23:05:47.719403', 'step': 3107, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:47.797723', 'step': 3107, 'epoch': 2} {'type': 'loss', 'content': 0.0016837787115946412, 'timestamp': '2025-09-30 23:05:47.811809', 'step': 3108, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:47.894128', 'step': 3108, 'epoch': 2} {'type': 'loss', 'content': 0.013686326332390308, 'timestamp': '2025-09-30 23:05:47.898189', 'step': 3109, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:47.986423', 'step': 3109, 'epoch': 2} {'type': 'loss', 'content': 0.051652584224939346, 'timestamp': '2025-09-30 23:05:47.995432', 'step': 3110, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:48.074955', 'step': 3110, 'epoch': 2} {'type': 'loss', 'content': 0.0009345960570499301, 'timestamp': '2025-09-30 23:05:48.086341', 'step': 3111, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:48.160835', 'step': 3111, 'epoch': 2} {'type': 'loss', 'content': 0.014455020427703857, 'timestamp': '2025-09-30 23:05:48.173804', 'step': 3112, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:48.246170', 'step': 3112, 'epoch': 2} {'type': 'loss', 'content': 0.029842155054211617, 'timestamp': '2025-09-30 23:05:48.252387', 'step': 3113, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:48.319842', 'step': 3113, 'epoch': 2} {'type': 'loss', 'content': 0.017067434266209602, 'timestamp': '2025-09-30 23:05:48.325484', 'step': 3114, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:48.392050', 'step': 3114, 'epoch': 2} {'type': 'loss', 'content': 0.027880912646651268, 'timestamp': '2025-09-30 23:05:48.400986', 'step': 3115, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:48.473419', 'step': 3115, 'epoch': 2} {'type': 'loss', 'content': 0.014249714091420174, 'timestamp': '2025-09-30 23:05:48.487655', 'step': 3116, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:48.566938', 'step': 3116, 'epoch': 2} {'type': 'loss', 'content': 0.003677439410239458, 'timestamp': '2025-09-30 23:05:48.574416', 'step': 3117, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:48.646799', 'step': 3117, 'epoch': 2} {'type': 'loss', 'content': 0.004886984825134277, 'timestamp': '2025-09-30 23:05:48.653226', 'step': 3118, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:48.719433', 'step': 3118, 'epoch': 2} {'type': 'loss', 'content': 0.0030212090350687504, 'timestamp': '2025-09-30 23:05:48.727080', 'step': 3119, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:48.800941', 'step': 3119, 'epoch': 2} {'type': 'loss', 'content': 0.0034950622357428074, 'timestamp': '2025-09-30 23:05:48.808784', 'step': 3120, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:48.871916', 'step': 3120, 'epoch': 2} {'type': 'loss', 'content': 0.004121148027479649, 'timestamp': '2025-09-30 23:05:48.875936', 'step': 3121, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:48.936841', 'step': 3121, 'epoch': 2} {'type': 'loss', 'content': 0.02637523226439953, 'timestamp': '2025-09-30 23:05:48.941645', 'step': 3122, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:49.007432', 'step': 3122, 'epoch': 2} {'type': 'loss', 'content': 0.02146524004638195, 'timestamp': '2025-09-30 23:05:49.021588', 'step': 3123, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:49.108244', 'step': 3123, 'epoch': 2} {'type': 'loss', 'content': 0.011523407883942127, 'timestamp': '2025-09-30 23:05:49.124972', 'step': 3124, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:49.183304', 'step': 3124, 'epoch': 2} {'type': 'loss', 'content': 0.009539658203721046, 'timestamp': '2025-09-30 23:05:49.187591', 'step': 3125, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:49.247492', 'step': 3125, 'epoch': 2} {'type': 'loss', 'content': 0.015585281886160374, 'timestamp': '2025-09-30 23:05:49.257058', 'step': 3126, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:49.325198', 'step': 3126, 'epoch': 2} {'type': 'loss', 'content': 0.05425044521689415, 'timestamp': '2025-09-30 23:05:49.333946', 'step': 3127, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:49.393718', 'step': 3127, 'epoch': 2} {'type': 'loss', 'content': 0.07365263253450394, 'timestamp': '2025-09-30 23:05:49.403953', 'step': 3128, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:49.468666', 'step': 3128, 'epoch': 2} {'type': 'loss', 'content': 0.013111332431435585, 'timestamp': '2025-09-30 23:05:49.475510', 'step': 3129, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:05:49.539390', 'step': 3129, 'epoch': 2} {'type': 'loss', 'content': 0.009978731162846088, 'timestamp': '2025-09-30 23:05:49.546310', 'step': 3130, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:49.614150', 'step': 3130, 'epoch': 2} {'type': 'loss', 'content': 0.004901269916445017, 'timestamp': '2025-09-30 23:05:49.616554', 'step': 3131, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:49.678242', 'step': 3131, 'epoch': 2} {'type': 'loss', 'content': 0.007782581727951765, 'timestamp': '2025-09-30 23:05:49.689627', 'step': 3132, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:49.754008', 'step': 3132, 'epoch': 2} {'type': 'loss', 'content': 0.003857517847791314, 'timestamp': '2025-09-30 23:05:49.763449', 'step': 3133, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:49.825598', 'step': 3133, 'epoch': 2} {'type': 'loss', 'content': 0.008736095391213894, 'timestamp': '2025-09-30 23:05:49.830175', 'step': 3134, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:49.891189', 'step': 3134, 'epoch': 2} {'type': 'loss', 'content': 0.018602851778268814, 'timestamp': '2025-09-30 23:05:49.895160', 'step': 3135, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:49.956039', 'step': 3135, 'epoch': 2} {'type': 'loss', 'content': 0.0071113319136202335, 'timestamp': '2025-09-30 23:05:49.965345', 'step': 3136, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:50.028039', 'step': 3136, 'epoch': 2} {'type': 'loss', 'content': 0.006508105434477329, 'timestamp': '2025-09-30 23:05:50.032669', 'step': 3137, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:50.093077', 'step': 3137, 'epoch': 2} {'type': 'loss', 'content': 0.0471738800406456, 'timestamp': '2025-09-30 23:05:50.101233', 'step': 3138, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:50.175418', 'step': 3138, 'epoch': 2} {'type': 'loss', 'content': 0.007766250986605883, 'timestamp': '2025-09-30 23:05:50.182317', 'step': 3139, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:05:50.259917', 'step': 3139, 'epoch': 2} {'type': 'loss', 'content': 0.015296977944672108, 'timestamp': '2025-09-30 23:05:50.268155', 'step': 3140, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:50.329604', 'step': 3140, 'epoch': 2} {'type': 'loss', 'content': 0.0016050689155235887, 'timestamp': '2025-09-30 23:05:50.338901', 'step': 3141, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:50.404236', 'step': 3141, 'epoch': 2} {'type': 'loss', 'content': 0.036469876766204834, 'timestamp': '2025-09-30 23:05:50.415030', 'step': 3142, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:50.487234', 'step': 3142, 'epoch': 2} {'type': 'loss', 'content': 0.01536401268094778, 'timestamp': '2025-09-30 23:05:50.497592', 'step': 3143, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:50.575779', 'step': 3143, 'epoch': 2} {'type': 'loss', 'content': 0.014295078814029694, 'timestamp': '2025-09-30 23:05:50.585196', 'step': 3144, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:05:50.643385', 'step': 3144, 'epoch': 2} {'type': 'loss', 'content': 0.014632738195359707, 'timestamp': '2025-09-30 23:05:50.647243', 'step': 3145, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:50.705723', 'step': 3145, 'epoch': 2} {'type': 'loss', 'content': 0.012540748342871666, 'timestamp': '2025-09-30 23:05:50.709823', 'step': 3146, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:50.772209', 'step': 3146, 'epoch': 2} {'type': 'loss', 'content': 0.009372703731060028, 'timestamp': '2025-09-30 23:05:50.777345', 'step': 3147, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:50.836067', 'step': 3147, 'epoch': 2} {'type': 'loss', 'content': 0.011748937889933586, 'timestamp': '2025-09-30 23:05:50.844688', 'step': 3148, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:50.908686', 'step': 3148, 'epoch': 2} {'type': 'loss', 'content': 0.017852921038866043, 'timestamp': '2025-09-30 23:05:50.913536', 'step': 3149, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:50.982071', 'step': 3149, 'epoch': 2} {'type': 'loss', 'content': 0.023919569328427315, 'timestamp': '2025-09-30 23:05:50.986493', 'step': 3150, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:51.045948', 'step': 3150, 'epoch': 2} {'type': 'loss', 'content': 0.0023387835826724768, 'timestamp': '2025-09-30 23:05:51.048204', 'step': 3151, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:51.111561', 'step': 3151, 'epoch': 2} {'type': 'loss', 'content': 0.005567805375903845, 'timestamp': '2025-09-30 23:05:51.119883', 'step': 3152, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:51.186556', 'step': 3152, 'epoch': 2} {'type': 'loss', 'content': 0.02966068685054779, 'timestamp': '2025-09-30 23:05:51.189607', 'step': 3153, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:51.244593', 'step': 3153, 'epoch': 2} {'type': 'loss', 'content': 0.0058517782017588615, 'timestamp': '2025-09-30 23:05:51.247053', 'step': 3154, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:51.302752', 'step': 3154, 'epoch': 2} {'type': 'loss', 'content': 0.0022076708264648914, 'timestamp': '2025-09-30 23:05:51.308068', 'step': 3155, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:05:51.385548', 'step': 3155, 'epoch': 2} {'type': 'loss', 'content': 0.010239413008093834, 'timestamp': '2025-09-30 23:05:51.397003', 'step': 3156, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:05:51.460084', 'step': 3156, 'epoch': 2} {'type': 'loss', 'content': 0.01567167602479458, 'timestamp': '2025-09-30 23:05:51.464224', 'step': 3157, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:51.521923', 'step': 3157, 'epoch': 2} {'type': 'loss', 'content': 0.010767006315290928, 'timestamp': '2025-09-30 23:05:51.525933', 'step': 3158, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:51.589954', 'step': 3158, 'epoch': 2} {'type': 'loss', 'content': 0.00782849732786417, 'timestamp': '2025-09-30 23:05:51.594658', 'step': 3159, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:51.653691', 'step': 3159, 'epoch': 2} {'type': 'loss', 'content': 0.005279154982417822, 'timestamp': '2025-09-30 23:05:51.663379', 'step': 3160, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:51.722054', 'step': 3160, 'epoch': 2} {'type': 'loss', 'content': 0.0077755083329975605, 'timestamp': '2025-09-30 23:05:51.724811', 'step': 3161, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:51.784605', 'step': 3161, 'epoch': 2} {'type': 'loss', 'content': 0.003425594652071595, 'timestamp': '2025-09-30 23:05:51.791932', 'step': 3162, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:51.858903', 'step': 3162, 'epoch': 2} {'type': 'loss', 'content': 0.023716673254966736, 'timestamp': '2025-09-30 23:05:51.867739', 'step': 3163, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:51.946665', 'step': 3163, 'epoch': 2} {'type': 'loss', 'content': 0.01629246398806572, 'timestamp': '2025-09-30 23:05:51.958766', 'step': 3164, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:52.013983', 'step': 3164, 'epoch': 2} {'type': 'loss', 'content': 0.0480400025844574, 'timestamp': '2025-09-30 23:05:52.016256', 'step': 3165, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:52.090577', 'step': 3165, 'epoch': 2} {'type': 'loss', 'content': 0.010403209365904331, 'timestamp': '2025-09-30 23:05:52.093387', 'step': 3166, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:52.160414', 'step': 3166, 'epoch': 2} {'type': 'loss', 'content': 0.004025950096547604, 'timestamp': '2025-09-30 23:05:52.169633', 'step': 3167, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:52.228940', 'step': 3167, 'epoch': 2} {'type': 'loss', 'content': 0.01237554382532835, 'timestamp': '2025-09-30 23:05:52.235827', 'step': 3168, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:52.296419', 'step': 3168, 'epoch': 2} {'type': 'loss', 'content': 0.027882354333996773, 'timestamp': '2025-09-30 23:05:52.299614', 'step': 3169, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:52.377865', 'step': 3169, 'epoch': 2} {'type': 'loss', 'content': 0.005425737705081701, 'timestamp': '2025-09-30 23:05:52.386631', 'step': 3170, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:52.448994', 'step': 3170, 'epoch': 2} {'type': 'loss', 'content': 0.0072270343080163, 'timestamp': '2025-09-30 23:05:52.457321', 'step': 3171, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:52.514498', 'step': 3171, 'epoch': 2} {'type': 'loss', 'content': 0.016269633546471596, 'timestamp': '2025-09-30 23:05:52.523379', 'step': 3172, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:52.588819', 'step': 3172, 'epoch': 2} {'type': 'loss', 'content': 0.029649551957845688, 'timestamp': '2025-09-30 23:05:52.595092', 'step': 3173, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:05:52.651997', 'step': 3173, 'epoch': 2} {'type': 'loss', 'content': 0.006008023861795664, 'timestamp': '2025-09-30 23:05:52.661111', 'step': 3174, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:52.724272', 'step': 3174, 'epoch': 2} {'type': 'loss', 'content': 0.0026734769344329834, 'timestamp': '2025-09-30 23:05:52.727511', 'step': 3175, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:52.796052', 'step': 3175, 'epoch': 2} {'type': 'loss', 'content': 0.002534782513976097, 'timestamp': '2025-09-30 23:05:52.804475', 'step': 3176, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:52.865780', 'step': 3176, 'epoch': 2} {'type': 'loss', 'content': 0.024967217817902565, 'timestamp': '2025-09-30 23:05:52.868779', 'step': 3177, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:52.934136', 'step': 3177, 'epoch': 2} {'type': 'loss', 'content': 0.02370208129286766, 'timestamp': '2025-09-30 23:05:52.937495', 'step': 3178, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:53.001071', 'step': 3178, 'epoch': 2} {'type': 'loss', 'content': 0.00019292197248432785, 'timestamp': '2025-09-30 23:05:53.004348', 'step': 3179, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:53.077961', 'step': 3179, 'epoch': 2} {'type': 'loss', 'content': 0.042333733290433884, 'timestamp': '2025-09-30 23:05:53.084350', 'step': 3180, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:53.149827', 'step': 3180, 'epoch': 2} {'type': 'loss', 'content': 0.08766088634729385, 'timestamp': '2025-09-30 23:05:53.167622', 'step': 3181, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:53.229076', 'step': 3181, 'epoch': 2} {'type': 'loss', 'content': 0.0032513632904738188, 'timestamp': '2025-09-30 23:05:53.235323', 'step': 3182, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:53.302467', 'step': 3182, 'epoch': 2} {'type': 'loss', 'content': 0.007287607993930578, 'timestamp': '2025-09-30 23:05:53.305404', 'step': 3183, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:53.365607', 'step': 3183, 'epoch': 2} {'type': 'loss', 'content': 0.07201679050922394, 'timestamp': '2025-09-30 23:05:53.371554', 'step': 3184, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:53.427568', 'step': 3184, 'epoch': 2} {'type': 'loss', 'content': 0.014154067263007164, 'timestamp': '2025-09-30 23:05:53.430393', 'step': 3185, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:53.485656', 'step': 3185, 'epoch': 2} {'type': 'loss', 'content': 0.03193763643503189, 'timestamp': '2025-09-30 23:05:53.488916', 'step': 3186, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:53.544361', 'step': 3186, 'epoch': 2} {'type': 'loss', 'content': 0.061934102326631546, 'timestamp': '2025-09-30 23:05:53.547640', 'step': 3187, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:53.601534', 'step': 3187, 'epoch': 2} {'type': 'loss', 'content': 0.0030697982292622328, 'timestamp': '2025-09-30 23:05:53.607524', 'step': 3188, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:53.660257', 'step': 3188, 'epoch': 2} {'type': 'loss', 'content': 0.0007566204876638949, 'timestamp': '2025-09-30 23:05:53.664519', 'step': 3189, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:53.720174', 'step': 3189, 'epoch': 2} {'type': 'loss', 'content': 0.02713628113269806, 'timestamp': '2025-09-30 23:05:53.723253', 'step': 3190, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:53.778630', 'step': 3190, 'epoch': 2} {'type': 'loss', 'content': 0.016344929113984108, 'timestamp': '2025-09-30 23:05:53.785072', 'step': 3191, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:53.848647', 'step': 3191, 'epoch': 2} {'type': 'loss', 'content': 0.006919470150023699, 'timestamp': '2025-09-30 23:05:53.856003', 'step': 3192, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1596914505344}], 'timestamp': '2025-09-30 23:05:57.978896', 'step': 3192, 'epoch': 2} {'type': 'pplx', 'content': 7704172.998226225, 'timestamp': '2025-09-30 23:05:57.983376', 'step': 3192, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:58.040837', 'step': 3192, 'epoch': 2} {'type': 'loss', 'content': 0.007795120123773813, 'timestamp': '2025-09-30 23:05:58.046720', 'step': 3193, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:58.114052', 'step': 3193, 'epoch': 2} {'type': 'loss', 'content': 0.055857859551906586, 'timestamp': '2025-09-30 23:05:58.120042', 'step': 3194, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:58.182806', 'step': 3194, 'epoch': 2} {'type': 'loss', 'content': 0.0009025979088619351, 'timestamp': '2025-09-30 23:05:58.191337', 'step': 3195, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:58.264044', 'step': 3195, 'epoch': 2} {'type': 'loss', 'content': 0.021297914907336235, 'timestamp': '2025-09-30 23:05:58.274503', 'step': 3196, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:58.349935', 'step': 3196, 'epoch': 2} {'type': 'loss', 'content': 0.005064324475824833, 'timestamp': '2025-09-30 23:05:58.354828', 'step': 3197, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:58.415524', 'step': 3197, 'epoch': 2} {'type': 'loss', 'content': 0.01145821064710617, 'timestamp': '2025-09-30 23:05:58.422205', 'step': 3198, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:58.490914', 'step': 3198, 'epoch': 2} {'type': 'loss', 'content': 0.03247365355491638, 'timestamp': '2025-09-30 23:05:58.498414', 'step': 3199, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:58.568867', 'step': 3199, 'epoch': 2} {'type': 'loss', 'content': 0.0388854444026947, 'timestamp': '2025-09-30 23:05:58.575503', 'step': 3200, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:58.639106', 'step': 3200, 'epoch': 2} {'type': 'loss', 'content': 0.010790661908686161, 'timestamp': '2025-09-30 23:05:58.647008', 'step': 3201, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:58.713648', 'step': 3201, 'epoch': 2} {'type': 'loss', 'content': 0.011963089928030968, 'timestamp': '2025-09-30 23:05:58.719718', 'step': 3202, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:58.779616', 'step': 3202, 'epoch': 2} {'type': 'loss', 'content': 0.00896822102367878, 'timestamp': '2025-09-30 23:05:58.784680', 'step': 3203, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:58.848629', 'step': 3203, 'epoch': 2} {'type': 'loss', 'content': 0.011938979849219322, 'timestamp': '2025-09-30 23:05:58.859692', 'step': 3204, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:58.928954', 'step': 3204, 'epoch': 2} {'type': 'loss', 'content': 0.006306201219558716, 'timestamp': '2025-09-30 23:05:58.932017', 'step': 3205, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:58.997577', 'step': 3205, 'epoch': 2} {'type': 'loss', 'content': 0.02083345130085945, 'timestamp': '2025-09-30 23:05:59.003780', 'step': 3206, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:59.066269', 'step': 3206, 'epoch': 2} {'type': 'loss', 'content': 0.001322788535617292, 'timestamp': '2025-09-30 23:05:59.076193', 'step': 3207, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:59.147792', 'step': 3207, 'epoch': 2} {'type': 'loss', 'content': 0.03455904871225357, 'timestamp': '2025-09-30 23:05:59.157018', 'step': 3208, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:59.215480', 'step': 3208, 'epoch': 2} {'type': 'loss', 'content': 0.01550264935940504, 'timestamp': '2025-09-30 23:05:59.221506', 'step': 3209, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:59.293038', 'step': 3209, 'epoch': 2} {'type': 'loss', 'content': 0.027635307982563972, 'timestamp': '2025-09-30 23:05:59.298434', 'step': 3210, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:59.369462', 'step': 3210, 'epoch': 2} {'type': 'loss', 'content': 0.002096894197165966, 'timestamp': '2025-09-30 23:05:59.375278', 'step': 3211, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:59.444366', 'step': 3211, 'epoch': 2} {'type': 'loss', 'content': 0.005694984924048185, 'timestamp': '2025-09-30 23:05:59.453242', 'step': 3212, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:59.520532', 'step': 3212, 'epoch': 2} {'type': 'loss', 'content': 0.002915899036452174, 'timestamp': '2025-09-30 23:05:59.529321', 'step': 3213, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:05:59.592222', 'step': 3213, 'epoch': 2} {'type': 'loss', 'content': 0.010388299822807312, 'timestamp': '2025-09-30 23:05:59.598764', 'step': 3214, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:59.665074', 'step': 3214, 'epoch': 2} {'type': 'loss', 'content': 0.09994705766439438, 'timestamp': '2025-09-30 23:05:59.670344', 'step': 3215, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:59.739115', 'step': 3215, 'epoch': 2} {'type': 'loss', 'content': 0.044548116624355316, 'timestamp': '2025-09-30 23:05:59.752404', 'step': 3216, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:59.833450', 'step': 3216, 'epoch': 2} {'type': 'loss', 'content': 0.021616889163851738, 'timestamp': '2025-09-30 23:05:59.843501', 'step': 3217, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:05:59.919123', 'step': 3217, 'epoch': 2} {'type': 'loss', 'content': 0.026607507839798927, 'timestamp': '2025-09-30 23:05:59.929326', 'step': 3218, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:05:59.993856', 'step': 3218, 'epoch': 2} {'type': 'loss', 'content': 0.01247410662472248, 'timestamp': '2025-09-30 23:05:59.999406', 'step': 3219, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:00.059067', 'step': 3219, 'epoch': 2} {'type': 'loss', 'content': 0.0013874524738639593, 'timestamp': '2025-09-30 23:06:00.066421', 'step': 3220, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:00.124620', 'step': 3220, 'epoch': 2} {'type': 'loss', 'content': 0.003401955123990774, 'timestamp': '2025-09-30 23:06:00.140907', 'step': 3221, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:00.201483', 'step': 3221, 'epoch': 2} {'type': 'loss', 'content': 0.016344284638762474, 'timestamp': '2025-09-30 23:06:00.208894', 'step': 3222, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:00.275216', 'step': 3222, 'epoch': 2} {'type': 'loss', 'content': 0.018215617164969444, 'timestamp': '2025-09-30 23:06:00.278249', 'step': 3223, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:00.335851', 'step': 3223, 'epoch': 2} {'type': 'loss', 'content': 0.023593930527567863, 'timestamp': '2025-09-30 23:06:00.342475', 'step': 3224, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:00.395690', 'step': 3224, 'epoch': 2} {'type': 'loss', 'content': 0.022215891629457474, 'timestamp': '2025-09-30 23:06:00.398253', 'step': 3225, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:00.452391', 'step': 3225, 'epoch': 2} {'type': 'loss', 'content': 0.003087508725002408, 'timestamp': '2025-09-30 23:06:00.454925', 'step': 3226, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:00.510801', 'step': 3226, 'epoch': 2} {'type': 'loss', 'content': 0.04250779375433922, 'timestamp': '2025-09-30 23:06:00.513320', 'step': 3227, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:00.567405', 'step': 3227, 'epoch': 2} {'type': 'loss', 'content': 0.03273626044392586, 'timestamp': '2025-09-30 23:06:00.574280', 'step': 3228, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:00.628101', 'step': 3228, 'epoch': 2} {'type': 'loss', 'content': 0.015987848863005638, 'timestamp': '2025-09-30 23:06:00.630705', 'step': 3229, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:00.691245', 'step': 3229, 'epoch': 2} {'type': 'loss', 'content': 0.03344235569238663, 'timestamp': '2025-09-30 23:06:00.696601', 'step': 3230, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:00.764033', 'step': 3230, 'epoch': 2} {'type': 'loss', 'content': 0.006118643097579479, 'timestamp': '2025-09-30 23:06:00.767659', 'step': 3231, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:00.825326', 'step': 3231, 'epoch': 2} {'type': 'loss', 'content': 0.004326509777456522, 'timestamp': '2025-09-30 23:06:00.833180', 'step': 3232, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:00.887695', 'step': 3232, 'epoch': 2} {'type': 'loss', 'content': 0.005738940089941025, 'timestamp': '2025-09-30 23:06:00.890877', 'step': 3233, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:00.948247', 'step': 3233, 'epoch': 2} {'type': 'loss', 'content': 0.037312205880880356, 'timestamp': '2025-09-30 23:06:00.951021', 'step': 3234, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:01.008209', 'step': 3234, 'epoch': 2} {'type': 'loss', 'content': 0.010468153282999992, 'timestamp': '2025-09-30 23:06:01.010793', 'step': 3235, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:01.068563', 'step': 3235, 'epoch': 2} {'type': 'loss', 'content': 0.006533832289278507, 'timestamp': '2025-09-30 23:06:01.074570', 'step': 3236, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:01.127560', 'step': 3236, 'epoch': 2} {'type': 'loss', 'content': 0.002077788580209017, 'timestamp': '2025-09-30 23:06:01.143050', 'step': 3237, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:01.196289', 'step': 3237, 'epoch': 2} {'type': 'loss', 'content': 0.0012913121609017253, 'timestamp': '2025-09-30 23:06:01.201199', 'step': 3238, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:01.258433', 'step': 3238, 'epoch': 2} {'type': 'loss', 'content': 0.019479839131236076, 'timestamp': '2025-09-30 23:06:01.263768', 'step': 3239, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:01.330301', 'step': 3239, 'epoch': 2} {'type': 'loss', 'content': 0.028049839660525322, 'timestamp': '2025-09-30 23:06:01.336629', 'step': 3240, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:01.399970', 'step': 3240, 'epoch': 2} {'type': 'loss', 'content': 0.00925076100975275, 'timestamp': '2025-09-30 23:06:01.404113', 'step': 3241, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:01.464864', 'step': 3241, 'epoch': 2} {'type': 'loss', 'content': 0.02562188170850277, 'timestamp': '2025-09-30 23:06:01.468565', 'step': 3242, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:01.525583', 'step': 3242, 'epoch': 2} {'type': 'loss', 'content': 0.008742175064980984, 'timestamp': '2025-09-30 23:06:01.529357', 'step': 3243, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:01.584881', 'step': 3243, 'epoch': 2} {'type': 'loss', 'content': 0.07185665518045425, 'timestamp': '2025-09-30 23:06:01.591003', 'step': 3244, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:01.646947', 'step': 3244, 'epoch': 2} {'type': 'loss', 'content': 0.030496561899781227, 'timestamp': '2025-09-30 23:06:01.649550', 'step': 3245, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:01.702904', 'step': 3245, 'epoch': 2} {'type': 'loss', 'content': 0.004534533713012934, 'timestamp': '2025-09-30 23:06:01.705114', 'step': 3246, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:01.758641', 'step': 3246, 'epoch': 2} {'type': 'loss', 'content': 0.005604031961411238, 'timestamp': '2025-09-30 23:06:01.761242', 'step': 3247, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:01.814903', 'step': 3247, 'epoch': 2} {'type': 'loss', 'content': 0.01550121046602726, 'timestamp': '2025-09-30 23:06:01.824457', 'step': 3248, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:01.878434', 'step': 3248, 'epoch': 2} {'type': 'loss', 'content': 0.012044670060276985, 'timestamp': '2025-09-30 23:06:01.885190', 'step': 3249, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:01.949951', 'step': 3249, 'epoch': 2} {'type': 'loss', 'content': 0.00621923478320241, 'timestamp': '2025-09-30 23:06:01.955994', 'step': 3250, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:02.023258', 'step': 3250, 'epoch': 2} {'type': 'loss', 'content': 0.0023737847805023193, 'timestamp': '2025-09-30 23:06:02.026307', 'step': 3251, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:02.090861', 'step': 3251, 'epoch': 2} {'type': 'loss', 'content': 0.010181472636759281, 'timestamp': '2025-09-30 23:06:02.097911', 'step': 3252, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:02.153682', 'step': 3252, 'epoch': 2} {'type': 'loss', 'content': 0.031606413424015045, 'timestamp': '2025-09-30 23:06:02.156708', 'step': 3253, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:02.213257', 'step': 3253, 'epoch': 2} {'type': 'loss', 'content': 0.008995710872113705, 'timestamp': '2025-09-30 23:06:02.220239', 'step': 3254, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:02.285521', 'step': 3254, 'epoch': 2} {'type': 'loss', 'content': 0.005825829692184925, 'timestamp': '2025-09-30 23:06:02.288006', 'step': 3255, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:02.341719', 'step': 3255, 'epoch': 2} {'type': 'loss', 'content': 0.007276550866663456, 'timestamp': '2025-09-30 23:06:02.347645', 'step': 3256, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:02.400816', 'step': 3256, 'epoch': 2} {'type': 'loss', 'content': 0.0161599013954401, 'timestamp': '2025-09-30 23:06:02.403647', 'step': 3257, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:02.457832', 'step': 3257, 'epoch': 2} {'type': 'loss', 'content': 0.02396126464009285, 'timestamp': '2025-09-30 23:06:02.462760', 'step': 3258, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:02.527375', 'step': 3258, 'epoch': 2} {'type': 'loss', 'content': 0.000666647800244391, 'timestamp': '2025-09-30 23:06:02.530055', 'step': 3259, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:02.593714', 'step': 3259, 'epoch': 2} {'type': 'loss', 'content': 0.012312081642448902, 'timestamp': '2025-09-30 23:06:02.600860', 'step': 3260, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:02.656900', 'step': 3260, 'epoch': 2} {'type': 'loss', 'content': 0.022731516510248184, 'timestamp': '2025-09-30 23:06:02.660098', 'step': 3261, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:02.714808', 'step': 3261, 'epoch': 2} {'type': 'loss', 'content': 0.019761692732572556, 'timestamp': '2025-09-30 23:06:02.717618', 'step': 3262, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:02.772524', 'step': 3262, 'epoch': 2} {'type': 'loss', 'content': 0.0042115733958780766, 'timestamp': '2025-09-30 23:06:02.776053', 'step': 3263, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:02.836048', 'step': 3263, 'epoch': 2} {'type': 'loss', 'content': 0.005428736563771963, 'timestamp': '2025-09-30 23:06:02.842441', 'step': 3264, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:02.901646', 'step': 3264, 'epoch': 2} {'type': 'loss', 'content': 0.03378496691584587, 'timestamp': '2025-09-30 23:06:02.904186', 'step': 3265, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:02.958060', 'step': 3265, 'epoch': 2} {'type': 'loss', 'content': 0.03747677430510521, 'timestamp': '2025-09-30 23:06:02.961127', 'step': 3266, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:03.018395', 'step': 3266, 'epoch': 2} {'type': 'loss', 'content': 0.02964751422405243, 'timestamp': '2025-09-30 23:06:03.023742', 'step': 3267, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:03.087772', 'step': 3267, 'epoch': 2} {'type': 'loss', 'content': 0.013275397010147572, 'timestamp': '2025-09-30 23:06:03.096686', 'step': 3268, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:03.161143', 'step': 3268, 'epoch': 2} {'type': 'loss', 'content': 0.05942004173994064, 'timestamp': '2025-09-30 23:06:03.164848', 'step': 3269, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:03.220009', 'step': 3269, 'epoch': 2} {'type': 'loss', 'content': 0.050600316375494, 'timestamp': '2025-09-30 23:06:03.223521', 'step': 3270, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:03.281020', 'step': 3270, 'epoch': 2} {'type': 'loss', 'content': 0.019342126324772835, 'timestamp': '2025-09-30 23:06:03.285097', 'step': 3271, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:03.347738', 'step': 3271, 'epoch': 2} {'type': 'loss', 'content': 0.016618134453892708, 'timestamp': '2025-09-30 23:06:03.360688', 'step': 3272, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:03.423300', 'step': 3272, 'epoch': 2} {'type': 'loss', 'content': 0.0310928076505661, 'timestamp': '2025-09-30 23:06:03.425851', 'step': 3273, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:03.479414', 'step': 3273, 'epoch': 2} {'type': 'loss', 'content': 0.014707602560520172, 'timestamp': '2025-09-30 23:06:03.482952', 'step': 3274, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:03.536219', 'step': 3274, 'epoch': 2} {'type': 'loss', 'content': 0.04199080541729927, 'timestamp': '2025-09-30 23:06:03.538593', 'step': 3275, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:03.591217', 'step': 3275, 'epoch': 2} {'type': 'loss', 'content': 0.010937763378024101, 'timestamp': '2025-09-30 23:06:03.597071', 'step': 3276, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:03.650389', 'step': 3276, 'epoch': 2} {'type': 'loss', 'content': 0.028903910890221596, 'timestamp': '2025-09-30 23:06:03.652753', 'step': 3277, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:03.705619', 'step': 3277, 'epoch': 2} {'type': 'loss', 'content': 0.0020439368672668934, 'timestamp': '2025-09-30 23:06:03.707882', 'step': 3278, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:03.760291', 'step': 3278, 'epoch': 2} {'type': 'loss', 'content': 0.004660067614167929, 'timestamp': '2025-09-30 23:06:03.762509', 'step': 3279, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:03.816064', 'step': 3279, 'epoch': 2} {'type': 'loss', 'content': 0.006544176954776049, 'timestamp': '2025-09-30 23:06:03.822057', 'step': 3280, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:06:03.874713', 'step': 3280, 'epoch': 2} {'type': 'loss', 'content': 0.0065456959418952465, 'timestamp': '2025-09-30 23:06:03.876924', 'step': 3281, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:03.931575', 'step': 3281, 'epoch': 2} {'type': 'loss', 'content': 0.003240781370550394, 'timestamp': '2025-09-30 23:06:03.933816', 'step': 3282, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:03.986865', 'step': 3282, 'epoch': 2} {'type': 'loss', 'content': 0.023570476099848747, 'timestamp': '2025-09-30 23:06:03.989140', 'step': 3283, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:04.042703', 'step': 3283, 'epoch': 2} {'type': 'loss', 'content': 0.002929928246885538, 'timestamp': '2025-09-30 23:06:04.048419', 'step': 3284, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:04.100848', 'step': 3284, 'epoch': 2} {'type': 'loss', 'content': 0.01101705152541399, 'timestamp': '2025-09-30 23:06:04.103240', 'step': 3285, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:04.158269', 'step': 3285, 'epoch': 2} {'type': 'loss', 'content': 0.017364298924803734, 'timestamp': '2025-09-30 23:06:04.166862', 'step': 3286, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:04.230349', 'step': 3286, 'epoch': 2} {'type': 'loss', 'content': 0.0022047071252018213, 'timestamp': '2025-09-30 23:06:04.233308', 'step': 3287, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:04.286461', 'step': 3287, 'epoch': 2} {'type': 'loss', 'content': 0.007194988429546356, 'timestamp': '2025-09-30 23:06:04.292626', 'step': 3288, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:04.346321', 'step': 3288, 'epoch': 2} {'type': 'loss', 'content': 0.018691886216402054, 'timestamp': '2025-09-30 23:06:04.348599', 'step': 3289, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:04.401633', 'step': 3289, 'epoch': 2} {'type': 'loss', 'content': 0.003033750457689166, 'timestamp': '2025-09-30 23:06:04.403824', 'step': 3290, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:04.458732', 'step': 3290, 'epoch': 2} {'type': 'loss', 'content': 0.018006494268774986, 'timestamp': '2025-09-30 23:06:04.461122', 'step': 3291, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:04.513933', 'step': 3291, 'epoch': 2} {'type': 'loss', 'content': 0.0032964737620204687, 'timestamp': '2025-09-30 23:06:04.520114', 'step': 3292, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:04.572736', 'step': 3292, 'epoch': 2} {'type': 'loss', 'content': 0.015140037052333355, 'timestamp': '2025-09-30 23:06:04.575309', 'step': 3293, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:04.628397', 'step': 3293, 'epoch': 2} {'type': 'loss', 'content': 0.04675699397921562, 'timestamp': '2025-09-30 23:06:04.630704', 'step': 3294, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:04.688332', 'step': 3294, 'epoch': 2} {'type': 'loss', 'content': 0.05239267274737358, 'timestamp': '2025-09-30 23:06:04.692106', 'step': 3295, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:04.750046', 'step': 3295, 'epoch': 2} {'type': 'loss', 'content': 0.03879278898239136, 'timestamp': '2025-09-30 23:06:04.756229', 'step': 3296, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:04.809098', 'step': 3296, 'epoch': 2} {'type': 'loss', 'content': 0.014537500217556953, 'timestamp': '2025-09-30 23:06:04.811579', 'step': 3297, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:04.867931', 'step': 3297, 'epoch': 2} {'type': 'loss', 'content': 0.007120892405509949, 'timestamp': '2025-09-30 23:06:04.871020', 'step': 3298, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:04.926738', 'step': 3298, 'epoch': 2} {'type': 'loss', 'content': 0.0309650469571352, 'timestamp': '2025-09-30 23:06:04.929533', 'step': 3299, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:04.983969', 'step': 3299, 'epoch': 2} {'type': 'loss', 'content': 0.009657798334956169, 'timestamp': '2025-09-30 23:06:04.989946', 'step': 3300, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:05.042674', 'step': 3300, 'epoch': 2} {'type': 'loss', 'content': 0.03734425827860832, 'timestamp': '2025-09-30 23:06:05.045192', 'step': 3301, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:05.098321', 'step': 3301, 'epoch': 2} {'type': 'loss', 'content': 0.01968427561223507, 'timestamp': '2025-09-30 23:06:05.101103', 'step': 3302, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:05.154688', 'step': 3302, 'epoch': 2} {'type': 'loss', 'content': 0.012018097564578056, 'timestamp': '2025-09-30 23:06:05.157348', 'step': 3303, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:05.210868', 'step': 3303, 'epoch': 2} {'type': 'loss', 'content': 0.011224695481359959, 'timestamp': '2025-09-30 23:06:05.216698', 'step': 3304, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:05.270256', 'step': 3304, 'epoch': 2} {'type': 'loss', 'content': 0.043597813695669174, 'timestamp': '2025-09-30 23:06:05.273624', 'step': 3305, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:05.326222', 'step': 3305, 'epoch': 2} {'type': 'loss', 'content': 0.025288721546530724, 'timestamp': '2025-09-30 23:06:05.328816', 'step': 3306, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:05.384206', 'step': 3306, 'epoch': 2} {'type': 'loss', 'content': 0.0065134731121361256, 'timestamp': '2025-09-30 23:06:05.386718', 'step': 3307, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:05.439599', 'step': 3307, 'epoch': 2} {'type': 'loss', 'content': 0.0069490219466388226, 'timestamp': '2025-09-30 23:06:05.445597', 'step': 3308, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:05.497965', 'step': 3308, 'epoch': 2} {'type': 'loss', 'content': 0.04584478586912155, 'timestamp': '2025-09-30 23:06:05.500563', 'step': 3309, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:05.554969', 'step': 3309, 'epoch': 2} {'type': 'loss', 'content': 0.01918674074113369, 'timestamp': '2025-09-30 23:06:05.557652', 'step': 3310, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:05.611470', 'step': 3310, 'epoch': 2} {'type': 'loss', 'content': 0.020749123767018318, 'timestamp': '2025-09-30 23:06:05.614023', 'step': 3311, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:05.667750', 'step': 3311, 'epoch': 2} {'type': 'loss', 'content': 0.0046030208468437195, 'timestamp': '2025-09-30 23:06:05.673489', 'step': 3312, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:05.725834', 'step': 3312, 'epoch': 2} {'type': 'loss', 'content': 0.053906504064798355, 'timestamp': '2025-09-30 23:06:05.732372', 'step': 3313, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:05.787604', 'step': 3313, 'epoch': 2} {'type': 'loss', 'content': 0.007899750955402851, 'timestamp': '2025-09-30 23:06:05.789896', 'step': 3314, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:05.842681', 'step': 3314, 'epoch': 2} {'type': 'loss', 'content': 0.013711373321712017, 'timestamp': '2025-09-30 23:06:05.845129', 'step': 3315, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:05.898893', 'step': 3315, 'epoch': 2} {'type': 'loss', 'content': 0.033397067338228226, 'timestamp': '2025-09-30 23:06:05.904767', 'step': 3316, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:05.957704', 'step': 3316, 'epoch': 2} {'type': 'loss', 'content': 0.01239831279963255, 'timestamp': '2025-09-30 23:06:05.960173', 'step': 3317, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:06.013384', 'step': 3317, 'epoch': 2} {'type': 'loss', 'content': 0.00502910977229476, 'timestamp': '2025-09-30 23:06:06.015802', 'step': 3318, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:06.068605', 'step': 3318, 'epoch': 2} {'type': 'loss', 'content': 0.0036339436192065477, 'timestamp': '2025-09-30 23:06:06.070911', 'step': 3319, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:06.124141', 'step': 3319, 'epoch': 2} {'type': 'loss', 'content': 0.003305143443867564, 'timestamp': '2025-09-30 23:06:06.130026', 'step': 3320, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:06.187355', 'step': 3320, 'epoch': 2} {'type': 'loss', 'content': 0.004306564573198557, 'timestamp': '2025-09-30 23:06:06.189837', 'step': 3321, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:06.243137', 'step': 3321, 'epoch': 2} {'type': 'loss', 'content': 0.016047583892941475, 'timestamp': '2025-09-30 23:06:06.245809', 'step': 3322, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:06.299271', 'step': 3322, 'epoch': 2} {'type': 'loss', 'content': 0.015376859344542027, 'timestamp': '2025-09-30 23:06:06.301648', 'step': 3323, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:06.360964', 'step': 3323, 'epoch': 2} {'type': 'loss', 'content': 0.05093502998352051, 'timestamp': '2025-09-30 23:06:06.367059', 'step': 3324, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:06.422872', 'step': 3324, 'epoch': 2} {'type': 'loss', 'content': 0.06558900326490402, 'timestamp': '2025-09-30 23:06:06.425467', 'step': 3325, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:06.479099', 'step': 3325, 'epoch': 2} {'type': 'loss', 'content': 0.0022440163884311914, 'timestamp': '2025-09-30 23:06:06.482435', 'step': 3326, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 23:06:06.539774', 'step': 3326, 'epoch': 2} {'type': 'loss', 'content': 0.021154696121811867, 'timestamp': '2025-09-30 23:06:06.542492', 'step': 3327, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:06.601562', 'step': 3327, 'epoch': 2} {'type': 'loss', 'content': 0.0013194283237680793, 'timestamp': '2025-09-30 23:06:06.608329', 'step': 3328, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:06.663799', 'step': 3328, 'epoch': 2} {'type': 'loss', 'content': 0.01085311733186245, 'timestamp': '2025-09-30 23:06:06.666678', 'step': 3329, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:06.721889', 'step': 3329, 'epoch': 2} {'type': 'loss', 'content': 0.061902593821287155, 'timestamp': '2025-09-30 23:06:06.724824', 'step': 3330, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:06.780885', 'step': 3330, 'epoch': 2} {'type': 'loss', 'content': 0.06641173362731934, 'timestamp': '2025-09-30 23:06:06.794089', 'step': 3331, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:06.857199', 'step': 3331, 'epoch': 2} {'type': 'loss', 'content': 0.019711146131157875, 'timestamp': '2025-09-30 23:06:06.864797', 'step': 3332, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:06.917990', 'step': 3332, 'epoch': 2} {'type': 'loss', 'content': 0.010299828834831715, 'timestamp': '2025-09-30 23:06:06.921938', 'step': 3333, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:06.979673', 'step': 3333, 'epoch': 2} {'type': 'loss', 'content': 0.05137210339307785, 'timestamp': '2025-09-30 23:06:06.982627', 'step': 3334, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:07.039114', 'step': 3334, 'epoch': 2} {'type': 'loss', 'content': 0.06547261774539948, 'timestamp': '2025-09-30 23:06:07.046927', 'step': 3335, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:07.110961', 'step': 3335, 'epoch': 2} {'type': 'loss', 'content': 0.019023984670639038, 'timestamp': '2025-09-30 23:06:07.117478', 'step': 3336, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:07.174841', 'step': 3336, 'epoch': 2} {'type': 'loss', 'content': 0.004744172096252441, 'timestamp': '2025-09-30 23:06:07.180049', 'step': 3337, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:07.235420', 'step': 3337, 'epoch': 2} {'type': 'loss', 'content': 0.02238360606133938, 'timestamp': '2025-09-30 23:06:07.237660', 'step': 3338, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:07.292280', 'step': 3338, 'epoch': 2} {'type': 'loss', 'content': 0.005154428072273731, 'timestamp': '2025-09-30 23:06:07.295170', 'step': 3339, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:07.349546', 'step': 3339, 'epoch': 2} {'type': 'loss', 'content': 0.011214182712137699, 'timestamp': '2025-09-30 23:06:07.359690', 'step': 3340, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:07.419055', 'step': 3340, 'epoch': 2} {'type': 'loss', 'content': 0.004133254289627075, 'timestamp': '2025-09-30 23:06:07.423214', 'step': 3341, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:07.478250', 'step': 3341, 'epoch': 2} {'type': 'loss', 'content': 0.019281258806586266, 'timestamp': '2025-09-30 23:06:07.480628', 'step': 3342, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:07.537313', 'step': 3342, 'epoch': 2} {'type': 'loss', 'content': 0.030530763790011406, 'timestamp': '2025-09-30 23:06:07.539614', 'step': 3343, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:07.592500', 'step': 3343, 'epoch': 2} {'type': 'loss', 'content': 0.002986304461956024, 'timestamp': '2025-09-30 23:06:07.598433', 'step': 3344, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1596914505344}], 'timestamp': '2025-09-30 23:06:11.049281', 'step': 3344, 'epoch': 2} {'type': 'pplx', 'content': 7425755.578202538, 'timestamp': '2025-09-30 23:06:11.051858', 'step': 3344, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:11.104350', 'step': 3344, 'epoch': 2} {'type': 'loss', 'content': 0.030339622870087624, 'timestamp': '2025-09-30 23:06:11.106690', 'step': 3345, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:11.161794', 'step': 3345, 'epoch': 2} {'type': 'loss', 'content': 0.015967193990945816, 'timestamp': '2025-09-30 23:06:11.164081', 'step': 3346, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:11.217024', 'step': 3346, 'epoch': 2} {'type': 'loss', 'content': 0.012655665166676044, 'timestamp': '2025-09-30 23:06:11.219646', 'step': 3347, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:11.273512', 'step': 3347, 'epoch': 2} {'type': 'loss', 'content': 0.010176638141274452, 'timestamp': '2025-09-30 23:06:11.280007', 'step': 3348, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:11.334654', 'step': 3348, 'epoch': 2} {'type': 'loss', 'content': 0.023158391937613487, 'timestamp': '2025-09-30 23:06:11.337062', 'step': 3349, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:11.390446', 'step': 3349, 'epoch': 2} {'type': 'loss', 'content': 0.02025497332215309, 'timestamp': '2025-09-30 23:06:11.392950', 'step': 3350, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:11.446022', 'step': 3350, 'epoch': 2} {'type': 'loss', 'content': 0.021919040009379387, 'timestamp': '2025-09-30 23:06:11.448621', 'step': 3351, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:11.501698', 'step': 3351, 'epoch': 2} {'type': 'loss', 'content': 0.017099330201745033, 'timestamp': '2025-09-30 23:06:11.507691', 'step': 3352, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:11.560865', 'step': 3352, 'epoch': 2} {'type': 'loss', 'content': 0.009068694896996021, 'timestamp': '2025-09-30 23:06:11.563213', 'step': 3353, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:11.615868', 'step': 3353, 'epoch': 2} {'type': 'loss', 'content': 0.010250935330986977, 'timestamp': '2025-09-30 23:06:11.618661', 'step': 3354, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:11.671859', 'step': 3354, 'epoch': 2} {'type': 'loss', 'content': 0.0016304508317261934, 'timestamp': '2025-09-30 23:06:11.674199', 'step': 3355, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:11.727005', 'step': 3355, 'epoch': 2} {'type': 'loss', 'content': 0.026032986119389534, 'timestamp': '2025-09-30 23:06:11.733147', 'step': 3356, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:11.786151', 'step': 3356, 'epoch': 2} {'type': 'loss', 'content': 0.04938999563455582, 'timestamp': '2025-09-30 23:06:11.788899', 'step': 3357, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:11.841900', 'step': 3357, 'epoch': 2} {'type': 'loss', 'content': 0.005775019526481628, 'timestamp': '2025-09-30 23:06:11.844341', 'step': 3358, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:11.898918', 'step': 3358, 'epoch': 2} {'type': 'loss', 'content': 0.022316500544548035, 'timestamp': '2025-09-30 23:06:11.901104', 'step': 3359, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:11.954895', 'step': 3359, 'epoch': 2} {'type': 'loss', 'content': 0.009821304120123386, 'timestamp': '2025-09-30 23:06:11.960890', 'step': 3360, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:12.015682', 'step': 3360, 'epoch': 2} {'type': 'loss', 'content': 0.020874667912721634, 'timestamp': '2025-09-30 23:06:12.018231', 'step': 3361, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:12.072418', 'step': 3361, 'epoch': 2} {'type': 'loss', 'content': 0.005753648467361927, 'timestamp': '2025-09-30 23:06:12.075377', 'step': 3362, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:12.128192', 'step': 3362, 'epoch': 2} {'type': 'loss', 'content': 0.0077017066068947315, 'timestamp': '2025-09-30 23:06:12.131112', 'step': 3363, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:12.184195', 'step': 3363, 'epoch': 2} {'type': 'loss', 'content': 0.037911515682935715, 'timestamp': '2025-09-30 23:06:12.190120', 'step': 3364, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:12.241996', 'step': 3364, 'epoch': 2} {'type': 'loss', 'content': 0.030184252187609673, 'timestamp': '2025-09-30 23:06:12.246929', 'step': 3365, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:12.299657', 'step': 3365, 'epoch': 2} {'type': 'loss', 'content': 0.053903814405202866, 'timestamp': '2025-09-30 23:06:12.302866', 'step': 3366, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:12.355295', 'step': 3366, 'epoch': 2} {'type': 'loss', 'content': 0.002002858091145754, 'timestamp': '2025-09-30 23:06:12.357572', 'step': 3367, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:12.411790', 'step': 3367, 'epoch': 2} {'type': 'loss', 'content': 0.03699963167309761, 'timestamp': '2025-09-30 23:06:12.418732', 'step': 3368, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:12.470968', 'step': 3368, 'epoch': 2} {'type': 'loss', 'content': 0.011355995200574398, 'timestamp': '2025-09-30 23:06:12.473439', 'step': 3369, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:12.526446', 'step': 3369, 'epoch': 2} {'type': 'loss', 'content': 0.015882233157753944, 'timestamp': '2025-09-30 23:06:12.528689', 'step': 3370, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:12.581297', 'step': 3370, 'epoch': 2} {'type': 'loss', 'content': 0.03476978838443756, 'timestamp': '2025-09-30 23:06:12.583562', 'step': 3371, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:12.642730', 'step': 3371, 'epoch': 2} {'type': 'loss', 'content': 0.024638721719384193, 'timestamp': '2025-09-30 23:06:12.648659', 'step': 3372, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:12.701528', 'step': 3372, 'epoch': 2} {'type': 'loss', 'content': 0.026135792955756187, 'timestamp': '2025-09-30 23:06:12.703942', 'step': 3373, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:12.758307', 'step': 3373, 'epoch': 2} {'type': 'loss', 'content': 0.038027770817279816, 'timestamp': '2025-09-30 23:06:12.760607', 'step': 3374, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:12.813185', 'step': 3374, 'epoch': 2} {'type': 'loss', 'content': 0.021302219480276108, 'timestamp': '2025-09-30 23:06:12.815755', 'step': 3375, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:12.870311', 'step': 3375, 'epoch': 2} {'type': 'loss', 'content': 0.01771019585430622, 'timestamp': '2025-09-30 23:06:12.876068', 'step': 3376, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:12.927949', 'step': 3376, 'epoch': 2} {'type': 'loss', 'content': 0.012803698889911175, 'timestamp': '2025-09-30 23:06:12.930729', 'step': 3377, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:12.984566', 'step': 3377, 'epoch': 2} {'type': 'loss', 'content': 0.009149691089987755, 'timestamp': '2025-09-30 23:06:12.987208', 'step': 3378, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:13.041764', 'step': 3378, 'epoch': 2} {'type': 'loss', 'content': 0.037048134952783585, 'timestamp': '2025-09-30 23:06:13.044780', 'step': 3379, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:13.097470', 'step': 3379, 'epoch': 2} {'type': 'loss', 'content': 0.008354992605745792, 'timestamp': '2025-09-30 23:06:13.103046', 'step': 3380, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:13.155184', 'step': 3380, 'epoch': 2} {'type': 'loss', 'content': 0.010191974230110645, 'timestamp': '2025-09-30 23:06:13.157532', 'step': 3381, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:13.210763', 'step': 3381, 'epoch': 2} {'type': 'loss', 'content': 0.009321297518908978, 'timestamp': '2025-09-30 23:06:13.213267', 'step': 3382, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:13.266432', 'step': 3382, 'epoch': 2} {'type': 'loss', 'content': 0.027109581977128983, 'timestamp': '2025-09-30 23:06:13.268875', 'step': 3383, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:13.322860', 'step': 3383, 'epoch': 2} {'type': 'loss', 'content': 0.025557544082403183, 'timestamp': '2025-09-30 23:06:13.329642', 'step': 3384, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:13.382336', 'step': 3384, 'epoch': 2} {'type': 'loss', 'content': 0.014172360301017761, 'timestamp': '2025-09-30 23:06:13.384969', 'step': 3385, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:13.438881', 'step': 3385, 'epoch': 2} {'type': 'loss', 'content': 0.01294358354061842, 'timestamp': '2025-09-30 23:06:13.441109', 'step': 3386, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:13.505762', 'step': 3386, 'epoch': 2} {'type': 'loss', 'content': 0.045242007821798325, 'timestamp': '2025-09-30 23:06:13.507999', 'step': 3387, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:13.560934', 'step': 3387, 'epoch': 2} {'type': 'loss', 'content': 0.031861286610364914, 'timestamp': '2025-09-30 23:06:13.566728', 'step': 3388, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:13.619164', 'step': 3388, 'epoch': 2} {'type': 'loss', 'content': 0.00943608395755291, 'timestamp': '2025-09-30 23:06:13.621543', 'step': 3389, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:13.675192', 'step': 3389, 'epoch': 2} {'type': 'loss', 'content': 0.0056036426685750484, 'timestamp': '2025-09-30 23:06:13.679281', 'step': 3390, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:13.735437', 'step': 3390, 'epoch': 2} {'type': 'loss', 'content': 0.013599516823887825, 'timestamp': '2025-09-30 23:06:13.737778', 'step': 3391, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:13.791760', 'step': 3391, 'epoch': 2} {'type': 'loss', 'content': 0.008537517860531807, 'timestamp': '2025-09-30 23:06:13.798984', 'step': 3392, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:13.851325', 'step': 3392, 'epoch': 2} {'type': 'loss', 'content': 0.021132951602339745, 'timestamp': '2025-09-30 23:06:13.854030', 'step': 3393, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:13.906370', 'step': 3393, 'epoch': 2} {'type': 'loss', 'content': 0.027962923049926758, 'timestamp': '2025-09-30 23:06:13.908638', 'step': 3394, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:13.961367', 'step': 3394, 'epoch': 2} {'type': 'loss', 'content': 0.009137154556810856, 'timestamp': '2025-09-30 23:06:13.963620', 'step': 3395, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:14.016198', 'step': 3395, 'epoch': 2} {'type': 'loss', 'content': 0.008811813779175282, 'timestamp': '2025-09-30 23:06:14.021931', 'step': 3396, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:06:14.074765', 'step': 3396, 'epoch': 2} {'type': 'loss', 'content': 0.0067888228222727776, 'timestamp': '2025-09-30 23:06:14.076949', 'step': 3397, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:14.129829', 'step': 3397, 'epoch': 2} {'type': 'loss', 'content': 0.02958562597632408, 'timestamp': '2025-09-30 23:06:14.132926', 'step': 3398, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:14.189187', 'step': 3398, 'epoch': 2} {'type': 'loss', 'content': 0.013377780094742775, 'timestamp': '2025-09-30 23:06:14.192158', 'step': 3399, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:14.247355', 'step': 3399, 'epoch': 2} {'type': 'loss', 'content': 0.014820112846791744, 'timestamp': '2025-09-30 23:06:14.253989', 'step': 3400, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:14.311304', 'step': 3400, 'epoch': 2} {'type': 'loss', 'content': 0.052470527589321136, 'timestamp': '2025-09-30 23:06:14.314533', 'step': 3401, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:14.371420', 'step': 3401, 'epoch': 2} {'type': 'loss', 'content': 0.01139987725764513, 'timestamp': '2025-09-30 23:06:14.374998', 'step': 3402, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:14.432444', 'step': 3402, 'epoch': 2} {'type': 'loss', 'content': 0.05514490604400635, 'timestamp': '2025-09-30 23:06:14.435645', 'step': 3403, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:14.493425', 'step': 3403, 'epoch': 2} {'type': 'loss', 'content': 0.017259197309613228, 'timestamp': '2025-09-30 23:06:14.500381', 'step': 3404, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:14.556063', 'step': 3404, 'epoch': 2} {'type': 'loss', 'content': 0.01358715258538723, 'timestamp': '2025-09-30 23:06:14.561166', 'step': 3405, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:14.617020', 'step': 3405, 'epoch': 2} {'type': 'loss', 'content': 0.003133968682959676, 'timestamp': '2025-09-30 23:06:14.620534', 'step': 3406, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:14.675295', 'step': 3406, 'epoch': 2} {'type': 'loss', 'content': 0.008449428714811802, 'timestamp': '2025-09-30 23:06:14.678640', 'step': 3407, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:14.734756', 'step': 3407, 'epoch': 2} {'type': 'loss', 'content': 0.0014776053139939904, 'timestamp': '2025-09-30 23:06:14.741425', 'step': 3408, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:14.796400', 'step': 3408, 'epoch': 2} {'type': 'loss', 'content': 0.06587426364421844, 'timestamp': '2025-09-30 23:06:14.800087', 'step': 3409, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:14.855239', 'step': 3409, 'epoch': 2} {'type': 'loss', 'content': 0.029008101671934128, 'timestamp': '2025-09-30 23:06:14.858039', 'step': 3410, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:14.914374', 'step': 3410, 'epoch': 2} {'type': 'loss', 'content': 0.0017102726269513369, 'timestamp': '2025-09-30 23:06:14.923053', 'step': 3411, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:14.977991', 'step': 3411, 'epoch': 2} {'type': 'loss', 'content': 0.0824623703956604, 'timestamp': '2025-09-30 23:06:14.984922', 'step': 3412, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:15.040184', 'step': 3412, 'epoch': 2} {'type': 'loss', 'content': 0.03412816673517227, 'timestamp': '2025-09-30 23:06:15.043287', 'step': 3413, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:15.100653', 'step': 3413, 'epoch': 2} {'type': 'loss', 'content': 0.021069006994366646, 'timestamp': '2025-09-30 23:06:15.103778', 'step': 3414, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:15.158104', 'step': 3414, 'epoch': 2} {'type': 'loss', 'content': 0.0076685575768351555, 'timestamp': '2025-09-30 23:06:15.161171', 'step': 3415, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:15.215586', 'step': 3415, 'epoch': 2} {'type': 'loss', 'content': 0.026331230998039246, 'timestamp': '2025-09-30 23:06:15.222010', 'step': 3416, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:15.275971', 'step': 3416, 'epoch': 2} {'type': 'loss', 'content': 0.002497793873772025, 'timestamp': '2025-09-30 23:06:15.279195', 'step': 3417, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:15.333958', 'step': 3417, 'epoch': 2} {'type': 'loss', 'content': 0.025042565539479256, 'timestamp': '2025-09-30 23:06:15.337200', 'step': 3418, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:15.394013', 'step': 3418, 'epoch': 2} {'type': 'loss', 'content': 0.012081674300134182, 'timestamp': '2025-09-30 23:06:15.397480', 'step': 3419, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:15.452509', 'step': 3419, 'epoch': 2} {'type': 'loss', 'content': 0.010923532769083977, 'timestamp': '2025-09-30 23:06:15.459361', 'step': 3420, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:15.513579', 'step': 3420, 'epoch': 2} {'type': 'loss', 'content': 0.02042435109615326, 'timestamp': '2025-09-30 23:06:15.516067', 'step': 3421, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:15.569573', 'step': 3421, 'epoch': 2} {'type': 'loss', 'content': 0.020661788061261177, 'timestamp': '2025-09-30 23:06:15.572076', 'step': 3422, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:15.626115', 'step': 3422, 'epoch': 2} {'type': 'loss', 'content': 0.0025122223887592554, 'timestamp': '2025-09-30 23:06:15.628465', 'step': 3423, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:15.681108', 'step': 3423, 'epoch': 2} {'type': 'loss', 'content': 0.00751533592119813, 'timestamp': '2025-09-30 23:06:15.687353', 'step': 3424, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:15.741434', 'step': 3424, 'epoch': 2} {'type': 'loss', 'content': 0.01881532371044159, 'timestamp': '2025-09-30 23:06:15.743817', 'step': 3425, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:15.796850', 'step': 3425, 'epoch': 2} {'type': 'loss', 'content': 0.044687751680612564, 'timestamp': '2025-09-30 23:06:15.799189', 'step': 3426, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:15.852838', 'step': 3426, 'epoch': 2} {'type': 'loss', 'content': 0.0011573395458981395, 'timestamp': '2025-09-30 23:06:15.855380', 'step': 3427, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:15.911438', 'step': 3427, 'epoch': 2} {'type': 'loss', 'content': 0.0012286717537790537, 'timestamp': '2025-09-30 23:06:15.917506', 'step': 3428, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:15.970209', 'step': 3428, 'epoch': 2} {'type': 'loss', 'content': 0.021096035838127136, 'timestamp': '2025-09-30 23:06:15.972460', 'step': 3429, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:16.025089', 'step': 3429, 'epoch': 2} {'type': 'loss', 'content': 0.01176526490598917, 'timestamp': '2025-09-30 23:06:16.027494', 'step': 3430, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:16.080382', 'step': 3430, 'epoch': 2} {'type': 'loss', 'content': 0.0277940034866333, 'timestamp': '2025-09-30 23:06:16.083723', 'step': 3431, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:16.136728', 'step': 3431, 'epoch': 2} {'type': 'loss', 'content': 0.002629377180710435, 'timestamp': '2025-09-30 23:06:16.142532', 'step': 3432, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:16.194145', 'step': 3432, 'epoch': 2} {'type': 'loss', 'content': 0.0625828206539154, 'timestamp': '2025-09-30 23:06:16.196436', 'step': 3433, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:16.248949', 'step': 3433, 'epoch': 2} {'type': 'loss', 'content': 0.0032178214751183987, 'timestamp': '2025-09-30 23:06:16.251685', 'step': 3434, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:16.304374', 'step': 3434, 'epoch': 2} {'type': 'loss', 'content': 0.04387861490249634, 'timestamp': '2025-09-30 23:06:16.306700', 'step': 3435, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:16.359428', 'step': 3435, 'epoch': 2} {'type': 'loss', 'content': 0.006879720836877823, 'timestamp': '2025-09-30 23:06:16.365258', 'step': 3436, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:16.417920', 'step': 3436, 'epoch': 2} {'type': 'loss', 'content': 0.02709881402552128, 'timestamp': '2025-09-30 23:06:16.420212', 'step': 3437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:16.472586', 'step': 3437, 'epoch': 2} {'type': 'loss', 'content': 0.025389304384589195, 'timestamp': '2025-09-30 23:06:16.474857', 'step': 3438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:16.527747', 'step': 3438, 'epoch': 2} {'type': 'loss', 'content': 0.030005162581801414, 'timestamp': '2025-09-30 23:06:16.530179', 'step': 3439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:16.582757', 'step': 3439, 'epoch': 2} {'type': 'loss', 'content': 0.009569878689944744, 'timestamp': '2025-09-30 23:06:16.588821', 'step': 3440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:16.642607', 'step': 3440, 'epoch': 2} {'type': 'loss', 'content': 0.04426365718245506, 'timestamp': '2025-09-30 23:06:16.645175', 'step': 3441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:16.707869', 'step': 3441, 'epoch': 2} {'type': 'loss', 'content': 0.02050027623772621, 'timestamp': '2025-09-30 23:06:16.710142', 'step': 3442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:16.762559', 'step': 3442, 'epoch': 2} {'type': 'loss', 'content': 0.004866474773734808, 'timestamp': '2025-09-30 23:06:16.765897', 'step': 3443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:16.820866', 'step': 3443, 'epoch': 2} {'type': 'loss', 'content': 0.018757512792944908, 'timestamp': '2025-09-30 23:06:16.826790', 'step': 3444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:16.879023', 'step': 3444, 'epoch': 2} {'type': 'loss', 'content': 0.04354578256607056, 'timestamp': '2025-09-30 23:06:16.881526', 'step': 3445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:16.934206', 'step': 3445, 'epoch': 2} {'type': 'loss', 'content': 0.024301331490278244, 'timestamp': '2025-09-30 23:06:16.936657', 'step': 3446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:16.989391', 'step': 3446, 'epoch': 2} {'type': 'loss', 'content': 0.02002860978245735, 'timestamp': '2025-09-30 23:06:16.991917', 'step': 3447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:17.044709', 'step': 3447, 'epoch': 2} {'type': 'loss', 'content': 0.005702354945242405, 'timestamp': '2025-09-30 23:06:17.050678', 'step': 3448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:17.105530', 'step': 3448, 'epoch': 2} {'type': 'loss', 'content': 0.027117598801851273, 'timestamp': '2025-09-30 23:06:17.108441', 'step': 3449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:17.178257', 'step': 3449, 'epoch': 2} {'type': 'loss', 'content': 0.01168428361415863, 'timestamp': '2025-09-30 23:06:17.181028', 'step': 3450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:17.234560', 'step': 3450, 'epoch': 2} {'type': 'loss', 'content': 0.015925584360957146, 'timestamp': '2025-09-30 23:06:17.237032', 'step': 3451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:17.289891', 'step': 3451, 'epoch': 2} {'type': 'loss', 'content': 0.024195192381739616, 'timestamp': '2025-09-30 23:06:17.295659', 'step': 3452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:17.347400', 'step': 3452, 'epoch': 2} {'type': 'loss', 'content': 0.07577551901340485, 'timestamp': '2025-09-30 23:06:17.349729', 'step': 3453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:17.402556', 'step': 3453, 'epoch': 2} {'type': 'loss', 'content': 0.004895809572190046, 'timestamp': '2025-09-30 23:06:17.405287', 'step': 3454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:17.458404', 'step': 3454, 'epoch': 2} {'type': 'loss', 'content': 0.006849043071269989, 'timestamp': '2025-09-30 23:06:17.460619', 'step': 3455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:17.513871', 'step': 3455, 'epoch': 2} {'type': 'loss', 'content': 0.0058351196348667145, 'timestamp': '2025-09-30 23:06:17.519674', 'step': 3456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:17.571807', 'step': 3456, 'epoch': 2} {'type': 'loss', 'content': 0.0104494858533144, 'timestamp': '2025-09-30 23:06:17.574109', 'step': 3457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:17.627114', 'step': 3457, 'epoch': 2} {'type': 'loss', 'content': 0.026790613308548927, 'timestamp': '2025-09-30 23:06:17.629461', 'step': 3458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:17.683606', 'step': 3458, 'epoch': 2} {'type': 'loss', 'content': 0.025362646207213402, 'timestamp': '2025-09-30 23:06:17.686392', 'step': 3459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:17.738937', 'step': 3459, 'epoch': 2} {'type': 'loss', 'content': 0.014129286631941795, 'timestamp': '2025-09-30 23:06:17.744605', 'step': 3460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:17.797506', 'step': 3460, 'epoch': 2} {'type': 'loss', 'content': 0.02499503083527088, 'timestamp': '2025-09-30 23:06:17.800942', 'step': 3461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:17.856249', 'step': 3461, 'epoch': 2} {'type': 'loss', 'content': 0.009864871390163898, 'timestamp': '2025-09-30 23:06:17.859005', 'step': 3462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:17.912039', 'step': 3462, 'epoch': 2} {'type': 'loss', 'content': 0.017019014805555344, 'timestamp': '2025-09-30 23:06:17.914158', 'step': 3463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:17.967583', 'step': 3463, 'epoch': 2} {'type': 'loss', 'content': 0.01705314964056015, 'timestamp': '2025-09-30 23:06:17.973397', 'step': 3464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:18.025235', 'step': 3464, 'epoch': 2} {'type': 'loss', 'content': 0.026418764144182205, 'timestamp': '2025-09-30 23:06:18.027868', 'step': 3465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:18.079701', 'step': 3465, 'epoch': 2} {'type': 'loss', 'content': 0.0027622689958661795, 'timestamp': '2025-09-30 23:06:18.082123', 'step': 3466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:18.135808', 'step': 3466, 'epoch': 2} {'type': 'loss', 'content': 0.007582558784633875, 'timestamp': '2025-09-30 23:06:18.137974', 'step': 3467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:18.190544', 'step': 3467, 'epoch': 2} {'type': 'loss', 'content': 0.012932316400110722, 'timestamp': '2025-09-30 23:06:18.196425', 'step': 3468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:18.254710', 'step': 3468, 'epoch': 2} {'type': 'loss', 'content': 0.010139676742255688, 'timestamp': '2025-09-30 23:06:18.257003', 'step': 3469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:18.308901', 'step': 3469, 'epoch': 2} {'type': 'loss', 'content': 0.00767445657402277, 'timestamp': '2025-09-30 23:06:18.320943', 'step': 3470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:18.373542', 'step': 3470, 'epoch': 2} {'type': 'loss', 'content': 0.025954661890864372, 'timestamp': '2025-09-30 23:06:18.375844', 'step': 3471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:18.428260', 'step': 3471, 'epoch': 2} {'type': 'loss', 'content': 0.004959893878549337, 'timestamp': '2025-09-30 23:06:18.434368', 'step': 3472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:18.486573', 'step': 3472, 'epoch': 2} {'type': 'loss', 'content': 0.04389457032084465, 'timestamp': '2025-09-30 23:06:18.488792', 'step': 3473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:18.542210', 'step': 3473, 'epoch': 2} {'type': 'loss', 'content': 0.005944516509771347, 'timestamp': '2025-09-30 23:06:18.544364', 'step': 3474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:18.597879', 'step': 3474, 'epoch': 2} {'type': 'loss', 'content': 0.013934994116425514, 'timestamp': '2025-09-30 23:06:18.600193', 'step': 3475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:18.652574', 'step': 3475, 'epoch': 2} {'type': 'loss', 'content': 0.03054254688322544, 'timestamp': '2025-09-30 23:06:18.658161', 'step': 3476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:18.709762', 'step': 3476, 'epoch': 2} {'type': 'loss', 'content': 0.020148353651165962, 'timestamp': '2025-09-30 23:06:18.712195', 'step': 3477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:18.764109', 'step': 3477, 'epoch': 2} {'type': 'loss', 'content': 0.025513632223010063, 'timestamp': '2025-09-30 23:06:18.767044', 'step': 3478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:18.819758', 'step': 3478, 'epoch': 2} {'type': 'loss', 'content': 0.022969115525484085, 'timestamp': '2025-09-30 23:06:18.822293', 'step': 3479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:18.874354', 'step': 3479, 'epoch': 2} {'type': 'loss', 'content': 0.00871242955327034, 'timestamp': '2025-09-30 23:06:18.881036', 'step': 3480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:18.932498', 'step': 3480, 'epoch': 2} {'type': 'loss', 'content': 0.01300531905144453, 'timestamp': '2025-09-30 23:06:18.934880', 'step': 3481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:18.986735', 'step': 3481, 'epoch': 2} {'type': 'loss', 'content': 0.005250118672847748, 'timestamp': '2025-09-30 23:06:18.989187', 'step': 3482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:19.041420', 'step': 3482, 'epoch': 2} {'type': 'loss', 'content': 0.0069277905859053135, 'timestamp': '2025-09-30 23:06:19.043693', 'step': 3483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:19.095576', 'step': 3483, 'epoch': 2} {'type': 'loss', 'content': 0.02899288572371006, 'timestamp': '2025-09-30 23:06:19.102574', 'step': 3484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:19.154542', 'step': 3484, 'epoch': 2} {'type': 'loss', 'content': 0.009043692611157894, 'timestamp': '2025-09-30 23:06:19.156865', 'step': 3485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:19.208487', 'step': 3485, 'epoch': 2} {'type': 'loss', 'content': 0.00762169947847724, 'timestamp': '2025-09-30 23:06:19.210702', 'step': 3486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:19.263094', 'step': 3486, 'epoch': 2} {'type': 'loss', 'content': 0.023362943902611732, 'timestamp': '2025-09-30 23:06:19.265342', 'step': 3487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:19.320670', 'step': 3487, 'epoch': 2} {'type': 'loss', 'content': 0.005887587554752827, 'timestamp': '2025-09-30 23:06:19.326304', 'step': 3488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:19.378370', 'step': 3488, 'epoch': 2} {'type': 'loss', 'content': 0.013097809627652168, 'timestamp': '2025-09-30 23:06:19.380997', 'step': 3489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:19.434406', 'step': 3489, 'epoch': 2} {'type': 'loss', 'content': 0.025400521233677864, 'timestamp': '2025-09-30 23:06:19.436771', 'step': 3490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:19.489082', 'step': 3490, 'epoch': 2} {'type': 'loss', 'content': 0.03821522369980812, 'timestamp': '2025-09-30 23:06:19.491696', 'step': 3491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:19.543960', 'step': 3491, 'epoch': 2} {'type': 'loss', 'content': 0.00675336504355073, 'timestamp': '2025-09-30 23:06:19.549975', 'step': 3492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:19.600932', 'step': 3492, 'epoch': 2} {'type': 'loss', 'content': 0.0024073554668575525, 'timestamp': '2025-09-30 23:06:19.604058', 'step': 3493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:19.657269', 'step': 3493, 'epoch': 2} {'type': 'loss', 'content': 0.034113168716430664, 'timestamp': '2025-09-30 23:06:19.660171', 'step': 3494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:19.712144', 'step': 3494, 'epoch': 2} {'type': 'loss', 'content': 0.006272822618484497, 'timestamp': '2025-09-30 23:06:19.714364', 'step': 3495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:19.767989', 'step': 3495, 'epoch': 2} {'type': 'loss', 'content': 0.006109371315687895, 'timestamp': '2025-09-30 23:06:19.780077', 'step': 3496, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1596914505344}], 'timestamp': '2025-09-30 23:06:23.282757', 'step': 3496, 'epoch': 2} {'type': 'pplx', 'content': 6466612.034447016, 'timestamp': '2025-09-30 23:06:23.285125', 'step': 3496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:23.334990', 'step': 3496, 'epoch': 2} {'type': 'loss', 'content': 0.014639596454799175, 'timestamp': '2025-09-30 23:06:23.337252', 'step': 3497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:23.390371', 'step': 3497, 'epoch': 2} {'type': 'loss', 'content': 0.018523629754781723, 'timestamp': '2025-09-30 23:06:23.392684', 'step': 3498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:23.445796', 'step': 3498, 'epoch': 2} {'type': 'loss', 'content': 0.0563783273100853, 'timestamp': '2025-09-30 23:06:23.448092', 'step': 3499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:23.503006', 'step': 3499, 'epoch': 2} {'type': 'loss', 'content': 0.010710524395108223, 'timestamp': '2025-09-30 23:06:23.508908', 'step': 3500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 3500', 'timestamp': '2025-09-30 23:06:24.094904', 'step': 3500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:24.148866', 'step': 3500, 'epoch': 2} {'type': 'loss', 'content': 0.019887251779437065, 'timestamp': '2025-09-30 23:06:24.151304', 'step': 3501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:24.204302', 'step': 3501, 'epoch': 2} {'type': 'loss', 'content': 0.010814785026013851, 'timestamp': '2025-09-30 23:06:24.207040', 'step': 3502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:24.259998', 'step': 3502, 'epoch': 2} {'type': 'loss', 'content': 0.0014052697224542499, 'timestamp': '2025-09-30 23:06:24.264504', 'step': 3503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:24.318650', 'step': 3503, 'epoch': 2} {'type': 'loss', 'content': 0.003028411418199539, 'timestamp': '2025-09-30 23:06:24.325063', 'step': 3504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:24.379215', 'step': 3504, 'epoch': 2} {'type': 'loss', 'content': 0.0001973855687538162, 'timestamp': '2025-09-30 23:06:24.383784', 'step': 3505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:24.449358', 'step': 3505, 'epoch': 2} {'type': 'loss', 'content': 0.0037980698980391026, 'timestamp': '2025-09-30 23:06:24.452621', 'step': 3506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:24.509377', 'step': 3506, 'epoch': 2} {'type': 'loss', 'content': 0.002754520159214735, 'timestamp': '2025-09-30 23:06:24.512462', 'step': 3507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:24.568115', 'step': 3507, 'epoch': 2} {'type': 'loss', 'content': 0.0023064452689141035, 'timestamp': '2025-09-30 23:06:24.576402', 'step': 3508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:24.634584', 'step': 3508, 'epoch': 2} {'type': 'loss', 'content': 0.008689417503774166, 'timestamp': '2025-09-30 23:06:24.638002', 'step': 3509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:24.691092', 'step': 3509, 'epoch': 2} {'type': 'loss', 'content': 0.008178197778761387, 'timestamp': '2025-09-30 23:06:24.693585', 'step': 3510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:24.748912', 'step': 3510, 'epoch': 2} {'type': 'loss', 'content': 0.04939861223101616, 'timestamp': '2025-09-30 23:06:24.751365', 'step': 3511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:24.807110', 'step': 3511, 'epoch': 2} {'type': 'loss', 'content': 0.09871534258127213, 'timestamp': '2025-09-30 23:06:24.821678', 'step': 3512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:24.878138', 'step': 3512, 'epoch': 2} {'type': 'loss', 'content': 0.0032344236969947815, 'timestamp': '2025-09-30 23:06:24.887848', 'step': 3513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:24.942140', 'step': 3513, 'epoch': 2} {'type': 'loss', 'content': 0.01073502842336893, 'timestamp': '2025-09-30 23:06:24.944884', 'step': 3514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:25.000113', 'step': 3514, 'epoch': 2} {'type': 'loss', 'content': 0.003676957217976451, 'timestamp': '2025-09-30 23:06:25.003774', 'step': 3515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:25.059065', 'step': 3515, 'epoch': 2} {'type': 'loss', 'content': 0.034774355590343475, 'timestamp': '2025-09-30 23:06:25.065604', 'step': 3516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:25.119881', 'step': 3516, 'epoch': 2} {'type': 'loss', 'content': 0.0022716238163411617, 'timestamp': '2025-09-30 23:06:25.125888', 'step': 3517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:25.189885', 'step': 3517, 'epoch': 2} {'type': 'loss', 'content': 0.007635748945176601, 'timestamp': '2025-09-30 23:06:25.192931', 'step': 3518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:25.249636', 'step': 3518, 'epoch': 2} {'type': 'loss', 'content': 0.00044649295159615576, 'timestamp': '2025-09-30 23:06:25.255062', 'step': 3519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:25.311997', 'step': 3519, 'epoch': 2} {'type': 'loss', 'content': 0.007586721330881119, 'timestamp': '2025-09-30 23:06:25.318965', 'step': 3520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:25.376003', 'step': 3520, 'epoch': 2} {'type': 'loss', 'content': 0.026957018300890923, 'timestamp': '2025-09-30 23:06:25.381540', 'step': 3521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:25.443264', 'step': 3521, 'epoch': 2} {'type': 'loss', 'content': 0.00733589893206954, 'timestamp': '2025-09-30 23:06:25.445736', 'step': 3522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:25.510578', 'step': 3522, 'epoch': 2} {'type': 'loss', 'content': 0.0037460827734321356, 'timestamp': '2025-09-30 23:06:25.513650', 'step': 3523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:25.568814', 'step': 3523, 'epoch': 2} {'type': 'loss', 'content': 0.01593150570988655, 'timestamp': '2025-09-30 23:06:25.575277', 'step': 3524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:25.631611', 'step': 3524, 'epoch': 2} {'type': 'loss', 'content': 0.0012413630029186606, 'timestamp': '2025-09-30 23:06:25.634611', 'step': 3525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:25.690562', 'step': 3525, 'epoch': 2} {'type': 'loss', 'content': 0.024408670142292976, 'timestamp': '2025-09-30 23:06:25.694350', 'step': 3526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:25.752027', 'step': 3526, 'epoch': 2} {'type': 'loss', 'content': 0.0004685515887103975, 'timestamp': '2025-09-30 23:06:25.755352', 'step': 3527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:25.811708', 'step': 3527, 'epoch': 2} {'type': 'loss', 'content': 0.017270194366574287, 'timestamp': '2025-09-30 23:06:25.819945', 'step': 3528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:25.875829', 'step': 3528, 'epoch': 2} {'type': 'loss', 'content': 0.028527621179819107, 'timestamp': '2025-09-30 23:06:25.879470', 'step': 3529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:25.936262', 'step': 3529, 'epoch': 2} {'type': 'loss', 'content': 0.027613872662186623, 'timestamp': '2025-09-30 23:06:25.939726', 'step': 3530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:25.998495', 'step': 3530, 'epoch': 2} {'type': 'loss', 'content': 0.0722653865814209, 'timestamp': '2025-09-30 23:06:26.002037', 'step': 3531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:26.059251', 'step': 3531, 'epoch': 2} {'type': 'loss', 'content': 0.01960560865700245, 'timestamp': '2025-09-30 23:06:26.065575', 'step': 3532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:26.120149', 'step': 3532, 'epoch': 2} {'type': 'loss', 'content': 0.010985327884554863, 'timestamp': '2025-09-30 23:06:26.124558', 'step': 3533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:26.180681', 'step': 3533, 'epoch': 2} {'type': 'loss', 'content': 0.0145855862647295, 'timestamp': '2025-09-30 23:06:26.183905', 'step': 3534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:26.240356', 'step': 3534, 'epoch': 2} {'type': 'loss', 'content': 0.038247279822826385, 'timestamp': '2025-09-30 23:06:26.244264', 'step': 3535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:26.300506', 'step': 3535, 'epoch': 2} {'type': 'loss', 'content': 0.013217455707490444, 'timestamp': '2025-09-30 23:06:26.307525', 'step': 3536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:26.363088', 'step': 3536, 'epoch': 2} {'type': 'loss', 'content': 0.04379204660654068, 'timestamp': '2025-09-30 23:06:26.369734', 'step': 3537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:26.433091', 'step': 3537, 'epoch': 2} {'type': 'loss', 'content': 0.014396597631275654, 'timestamp': '2025-09-30 23:06:26.439229', 'step': 3538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:26.494177', 'step': 3538, 'epoch': 2} {'type': 'loss', 'content': 0.022583315148949623, 'timestamp': '2025-09-30 23:06:26.498696', 'step': 3539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:26.553922', 'step': 3539, 'epoch': 2} {'type': 'loss', 'content': 0.015995092689990997, 'timestamp': '2025-09-30 23:06:26.560656', 'step': 3540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:26.615750', 'step': 3540, 'epoch': 2} {'type': 'loss', 'content': 0.012678117491304874, 'timestamp': '2025-09-30 23:06:26.618755', 'step': 3541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:26.674904', 'step': 3541, 'epoch': 2} {'type': 'loss', 'content': 0.04315447434782982, 'timestamp': '2025-09-30 23:06:26.679419', 'step': 3542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:26.737013', 'step': 3542, 'epoch': 2} {'type': 'loss', 'content': 0.03808184340596199, 'timestamp': '2025-09-30 23:06:26.742920', 'step': 3543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:26.800619', 'step': 3543, 'epoch': 2} {'type': 'loss', 'content': 0.0010877930326387286, 'timestamp': '2025-09-30 23:06:26.807720', 'step': 3544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:26.864002', 'step': 3544, 'epoch': 2} {'type': 'loss', 'content': 0.0029124042484909296, 'timestamp': '2025-09-30 23:06:26.868196', 'step': 3545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:26.924745', 'step': 3545, 'epoch': 2} {'type': 'loss', 'content': 0.03600179776549339, 'timestamp': '2025-09-30 23:06:26.928208', 'step': 3546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:26.984671', 'step': 3546, 'epoch': 2} {'type': 'loss', 'content': 0.030898451805114746, 'timestamp': '2025-09-30 23:06:26.990748', 'step': 3547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:27.045571', 'step': 3547, 'epoch': 2} {'type': 'loss', 'content': 0.022288445383310318, 'timestamp': '2025-09-30 23:06:27.054076', 'step': 3548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:27.110081', 'step': 3548, 'epoch': 2} {'type': 'loss', 'content': 0.0010294837411493063, 'timestamp': '2025-09-30 23:06:27.113832', 'step': 3549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:27.171037', 'step': 3549, 'epoch': 2} {'type': 'loss', 'content': 0.04140794649720192, 'timestamp': '2025-09-30 23:06:27.174482', 'step': 3550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:27.238102', 'step': 3550, 'epoch': 2} {'type': 'loss', 'content': 0.013553565368056297, 'timestamp': '2025-09-30 23:06:27.241529', 'step': 3551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:27.297020', 'step': 3551, 'epoch': 2} {'type': 'loss', 'content': 0.005503360647708178, 'timestamp': '2025-09-30 23:06:27.304511', 'step': 3552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:27.362114', 'step': 3552, 'epoch': 2} {'type': 'loss', 'content': 0.00764206750318408, 'timestamp': '2025-09-30 23:06:27.365008', 'step': 3553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:27.430181', 'step': 3553, 'epoch': 2} {'type': 'loss', 'content': 0.0030871666967868805, 'timestamp': '2025-09-30 23:06:27.432981', 'step': 3554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:27.493118', 'step': 3554, 'epoch': 2} {'type': 'loss', 'content': 0.02638370916247368, 'timestamp': '2025-09-30 23:06:27.496493', 'step': 3555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:27.553880', 'step': 3555, 'epoch': 2} {'type': 'loss', 'content': 0.03153368458151817, 'timestamp': '2025-09-30 23:06:27.560774', 'step': 3556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:27.615042', 'step': 3556, 'epoch': 2} {'type': 'loss', 'content': 0.01355680264532566, 'timestamp': '2025-09-30 23:06:27.617823', 'step': 3557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:27.676001', 'step': 3557, 'epoch': 2} {'type': 'loss', 'content': 0.015167312696576118, 'timestamp': '2025-09-30 23:06:27.681217', 'step': 3558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:27.743108', 'step': 3558, 'epoch': 2} {'type': 'loss', 'content': 0.010744026862084866, 'timestamp': '2025-09-30 23:06:27.747061', 'step': 3559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:27.803617', 'step': 3559, 'epoch': 2} {'type': 'loss', 'content': 0.0076886555179953575, 'timestamp': '2025-09-30 23:06:27.810777', 'step': 3560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:27.868333', 'step': 3560, 'epoch': 2} {'type': 'loss', 'content': 0.03365146368741989, 'timestamp': '2025-09-30 23:06:27.871641', 'step': 3561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:27.931911', 'step': 3561, 'epoch': 2} {'type': 'loss', 'content': 0.010277901776134968, 'timestamp': '2025-09-30 23:06:27.935440', 'step': 3562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:27.991752', 'step': 3562, 'epoch': 2} {'type': 'loss', 'content': 0.00134531210642308, 'timestamp': '2025-09-30 23:06:27.995698', 'step': 3563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:28.055178', 'step': 3563, 'epoch': 2} {'type': 'loss', 'content': 0.020437004044651985, 'timestamp': '2025-09-30 23:06:28.063039', 'step': 3564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:28.120124', 'step': 3564, 'epoch': 2} {'type': 'loss', 'content': 0.015443087555468082, 'timestamp': '2025-09-30 23:06:28.123883', 'step': 3565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:28.180042', 'step': 3565, 'epoch': 2} {'type': 'loss', 'content': 0.01101844571530819, 'timestamp': '2025-09-30 23:06:28.184235', 'step': 3566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:28.240502', 'step': 3566, 'epoch': 2} {'type': 'loss', 'content': 0.022592827677726746, 'timestamp': '2025-09-30 23:06:28.245563', 'step': 3567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:28.303623', 'step': 3567, 'epoch': 2} {'type': 'loss', 'content': 0.014244282618165016, 'timestamp': '2025-09-30 23:06:28.319695', 'step': 3568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:28.375818', 'step': 3568, 'epoch': 2} {'type': 'loss', 'content': 0.004783723969012499, 'timestamp': '2025-09-30 23:06:28.380730', 'step': 3569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:28.439910', 'step': 3569, 'epoch': 2} {'type': 'loss', 'content': 0.010718471370637417, 'timestamp': '2025-09-30 23:06:28.445770', 'step': 3570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:28.505337', 'step': 3570, 'epoch': 2} {'type': 'loss', 'content': 0.015697749331593513, 'timestamp': '2025-09-30 23:06:28.509037', 'step': 3571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:28.575985', 'step': 3571, 'epoch': 2} {'type': 'loss', 'content': 0.0181056410074234, 'timestamp': '2025-09-30 23:06:28.582810', 'step': 3572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:28.639490', 'step': 3572, 'epoch': 2} {'type': 'loss', 'content': 0.006284915376454592, 'timestamp': '2025-09-30 23:06:28.642824', 'step': 3573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:28.700109', 'step': 3573, 'epoch': 2} {'type': 'loss', 'content': 0.016568128019571304, 'timestamp': '2025-09-30 23:06:28.704698', 'step': 3574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:28.762224', 'step': 3574, 'epoch': 2} {'type': 'loss', 'content': 0.01289291586726904, 'timestamp': '2025-09-30 23:06:28.766232', 'step': 3575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:28.824677', 'step': 3575, 'epoch': 2} {'type': 'loss', 'content': 0.011458429507911205, 'timestamp': '2025-09-30 23:06:28.833082', 'step': 3576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:28.889936', 'step': 3576, 'epoch': 2} {'type': 'loss', 'content': 0.0027532961685210466, 'timestamp': '2025-09-30 23:06:28.893320', 'step': 3577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:28.950016', 'step': 3577, 'epoch': 2} {'type': 'loss', 'content': 0.021998891606926918, 'timestamp': '2025-09-30 23:06:28.955548', 'step': 3578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:29.038546', 'step': 3578, 'epoch': 2} {'type': 'loss', 'content': 0.009463933296501637, 'timestamp': '2025-09-30 23:06:29.044104', 'step': 3579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:29.110127', 'step': 3579, 'epoch': 2} {'type': 'loss', 'content': 0.005092587787657976, 'timestamp': '2025-09-30 23:06:29.118244', 'step': 3580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:29.187168', 'step': 3580, 'epoch': 2} {'type': 'loss', 'content': 0.0035547346342355013, 'timestamp': '2025-09-30 23:06:29.190041', 'step': 3581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:29.250652', 'step': 3581, 'epoch': 2} {'type': 'loss', 'content': 0.02434438467025757, 'timestamp': '2025-09-30 23:06:29.258758', 'step': 3582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:29.326861', 'step': 3582, 'epoch': 2} {'type': 'loss', 'content': 0.012047291733324528, 'timestamp': '2025-09-30 23:06:29.330071', 'step': 3583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:29.388756', 'step': 3583, 'epoch': 2} {'type': 'loss', 'content': 0.035410862416028976, 'timestamp': '2025-09-30 23:06:29.395391', 'step': 3584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:29.455309', 'step': 3584, 'epoch': 2} {'type': 'loss', 'content': 0.0033878255635499954, 'timestamp': '2025-09-30 23:06:29.461920', 'step': 3585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:29.522411', 'step': 3585, 'epoch': 2} {'type': 'loss', 'content': 0.0012070528464391828, 'timestamp': '2025-09-30 23:06:29.532182', 'step': 3586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:29.602601', 'step': 3586, 'epoch': 2} {'type': 'loss', 'content': 0.011343760415911674, 'timestamp': '2025-09-30 23:06:29.609929', 'step': 3587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:29.678558', 'step': 3587, 'epoch': 2} {'type': 'loss', 'content': 0.024823160842061043, 'timestamp': '2025-09-30 23:06:29.689225', 'step': 3588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:29.757588', 'step': 3588, 'epoch': 2} {'type': 'loss', 'content': 0.03796323761343956, 'timestamp': '2025-09-30 23:06:29.765820', 'step': 3589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:29.836059', 'step': 3589, 'epoch': 2} {'type': 'loss', 'content': 0.024615218862891197, 'timestamp': '2025-09-30 23:06:29.844413', 'step': 3590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:29.906669', 'step': 3590, 'epoch': 2} {'type': 'loss', 'content': 0.03321540728211403, 'timestamp': '2025-09-30 23:06:29.910408', 'step': 3591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:29.984519', 'step': 3591, 'epoch': 2} {'type': 'loss', 'content': 0.01201066467911005, 'timestamp': '2025-09-30 23:06:29.991899', 'step': 3592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:30.050486', 'step': 3592, 'epoch': 2} {'type': 'loss', 'content': 0.024226412177085876, 'timestamp': '2025-09-30 23:06:30.060776', 'step': 3593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:30.126189', 'step': 3593, 'epoch': 2} {'type': 'loss', 'content': 0.03048400953412056, 'timestamp': '2025-09-30 23:06:30.133955', 'step': 3594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:30.198573', 'step': 3594, 'epoch': 2} {'type': 'loss', 'content': 0.003573102643713355, 'timestamp': '2025-09-30 23:06:30.206868', 'step': 3595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:30.277625', 'step': 3595, 'epoch': 2} {'type': 'loss', 'content': 0.014920267276465893, 'timestamp': '2025-09-30 23:06:30.286980', 'step': 3596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:30.351899', 'step': 3596, 'epoch': 2} {'type': 'loss', 'content': 0.06640743464231491, 'timestamp': '2025-09-30 23:06:30.355550', 'step': 3597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:30.419607', 'step': 3597, 'epoch': 2} {'type': 'loss', 'content': 0.004590391181409359, 'timestamp': '2025-09-30 23:06:30.427973', 'step': 3598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:30.502567', 'step': 3598, 'epoch': 2} {'type': 'loss', 'content': 0.0056806462816894054, 'timestamp': '2025-09-30 23:06:30.505441', 'step': 3599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:30.568750', 'step': 3599, 'epoch': 2} {'type': 'loss', 'content': 0.003221316495910287, 'timestamp': '2025-09-30 23:06:30.578716', 'step': 3600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:30.641689', 'step': 3600, 'epoch': 2} {'type': 'loss', 'content': 0.0017014023615047336, 'timestamp': '2025-09-30 23:06:30.655771', 'step': 3601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:30.723004', 'step': 3601, 'epoch': 2} {'type': 'loss', 'content': 0.014282437972724438, 'timestamp': '2025-09-30 23:06:30.728860', 'step': 3602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:30.798274', 'step': 3602, 'epoch': 2} {'type': 'loss', 'content': 0.005231867544353008, 'timestamp': '2025-09-30 23:06:30.802765', 'step': 3603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:30.871682', 'step': 3603, 'epoch': 2} {'type': 'loss', 'content': 0.018528280779719353, 'timestamp': '2025-09-30 23:06:30.883988', 'step': 3604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:30.949977', 'step': 3604, 'epoch': 2} {'type': 'loss', 'content': 0.03686800226569176, 'timestamp': '2025-09-30 23:06:30.957878', 'step': 3605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:31.032480', 'step': 3605, 'epoch': 2} {'type': 'loss', 'content': 0.0016684355214238167, 'timestamp': '2025-09-30 23:06:31.043501', 'step': 3606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:31.121948', 'step': 3606, 'epoch': 2} {'type': 'loss', 'content': 0.008187459781765938, 'timestamp': '2025-09-30 23:06:31.132290', 'step': 3607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:31.190691', 'step': 3607, 'epoch': 2} {'type': 'loss', 'content': 0.03177248314023018, 'timestamp': '2025-09-30 23:06:31.198569', 'step': 3608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:31.270151', 'step': 3608, 'epoch': 2} {'type': 'loss', 'content': 0.011961095966398716, 'timestamp': '2025-09-30 23:06:31.273180', 'step': 3609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:31.340326', 'step': 3609, 'epoch': 2} {'type': 'loss', 'content': 0.004345990251749754, 'timestamp': '2025-09-30 23:06:31.351389', 'step': 3610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:31.416419', 'step': 3610, 'epoch': 2} {'type': 'loss', 'content': 0.003927549812942743, 'timestamp': '2025-09-30 23:06:31.427794', 'step': 3611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:31.498995', 'step': 3611, 'epoch': 2} {'type': 'loss', 'content': 0.005982740316540003, 'timestamp': '2025-09-30 23:06:31.505941', 'step': 3612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:31.565019', 'step': 3612, 'epoch': 2} {'type': 'loss', 'content': 0.03087427094578743, 'timestamp': '2025-09-30 23:06:31.567247', 'step': 3613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:31.624686', 'step': 3613, 'epoch': 2} {'type': 'loss', 'content': 0.010352035984396935, 'timestamp': '2025-09-30 23:06:31.627851', 'step': 3614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:06:31.687187', 'step': 3614, 'epoch': 2} {'type': 'loss', 'content': 0.03866512328386307, 'timestamp': '2025-09-30 23:06:31.691039', 'step': 3615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:31.756158', 'step': 3615, 'epoch': 2} {'type': 'loss', 'content': 0.0284222811460495, 'timestamp': '2025-09-30 23:06:31.765314', 'step': 3616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:31.830039', 'step': 3616, 'epoch': 2} {'type': 'loss', 'content': 0.007665965706110001, 'timestamp': '2025-09-30 23:06:31.838662', 'step': 3617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:31.902367', 'step': 3617, 'epoch': 2} {'type': 'loss', 'content': 0.019629430025815964, 'timestamp': '2025-09-30 23:06:31.905099', 'step': 3618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:31.973370', 'step': 3618, 'epoch': 2} {'type': 'loss', 'content': 0.004057222045958042, 'timestamp': '2025-09-30 23:06:31.981259', 'step': 3619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:32.042378', 'step': 3619, 'epoch': 2} {'type': 'loss', 'content': 0.07298438996076584, 'timestamp': '2025-09-30 23:06:32.052793', 'step': 3620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:32.116800', 'step': 3620, 'epoch': 2} {'type': 'loss', 'content': 0.08330465853214264, 'timestamp': '2025-09-30 23:06:32.123248', 'step': 3621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:32.185643', 'step': 3621, 'epoch': 2} {'type': 'loss', 'content': 0.010923488065600395, 'timestamp': '2025-09-30 23:06:32.191225', 'step': 3622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:32.258316', 'step': 3622, 'epoch': 2} {'type': 'loss', 'content': 0.010550771839916706, 'timestamp': '2025-09-30 23:06:32.263446', 'step': 3623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:32.332860', 'step': 3623, 'epoch': 2} {'type': 'loss', 'content': 0.00373009592294693, 'timestamp': '2025-09-30 23:06:32.342479', 'step': 3624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:32.399938', 'step': 3624, 'epoch': 2} {'type': 'loss', 'content': 0.010864108800888062, 'timestamp': '2025-09-30 23:06:32.405248', 'step': 3625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:32.467524', 'step': 3625, 'epoch': 2} {'type': 'loss', 'content': 0.00040927171357907355, 'timestamp': '2025-09-30 23:06:32.470605', 'step': 3626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:32.536678', 'step': 3626, 'epoch': 2} {'type': 'loss', 'content': 0.06054940074682236, 'timestamp': '2025-09-30 23:06:32.546008', 'step': 3627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:32.613740', 'step': 3627, 'epoch': 2} {'type': 'loss', 'content': 0.012395032681524754, 'timestamp': '2025-09-30 23:06:32.623057', 'step': 3628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:32.687675', 'step': 3628, 'epoch': 2} {'type': 'loss', 'content': 0.009266926907002926, 'timestamp': '2025-09-30 23:06:32.694227', 'step': 3629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:32.754662', 'step': 3629, 'epoch': 2} {'type': 'loss', 'content': 0.023078257218003273, 'timestamp': '2025-09-30 23:06:32.761553', 'step': 3630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:32.828130', 'step': 3630, 'epoch': 2} {'type': 'loss', 'content': 0.016073303297162056, 'timestamp': '2025-09-30 23:06:32.833565', 'step': 3631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:32.889986', 'step': 3631, 'epoch': 2} {'type': 'loss', 'content': 0.043678633868694305, 'timestamp': '2025-09-30 23:06:32.895957', 'step': 3632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:32.953176', 'step': 3632, 'epoch': 2} {'type': 'loss', 'content': 0.03460721671581268, 'timestamp': '2025-09-30 23:06:32.955629', 'step': 3633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:33.012689', 'step': 3633, 'epoch': 2} {'type': 'loss', 'content': 0.006734173279255629, 'timestamp': '2025-09-30 23:06:33.017304', 'step': 3634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:33.075891', 'step': 3634, 'epoch': 2} {'type': 'loss', 'content': 0.01312424149364233, 'timestamp': '2025-09-30 23:06:33.082664', 'step': 3635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:33.140697', 'step': 3635, 'epoch': 2} {'type': 'loss', 'content': 0.015850789844989777, 'timestamp': '2025-09-30 23:06:33.147672', 'step': 3636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:33.211613', 'step': 3636, 'epoch': 2} {'type': 'loss', 'content': 0.07885492593050003, 'timestamp': '2025-09-30 23:06:33.215666', 'step': 3637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:33.269656', 'step': 3637, 'epoch': 2} {'type': 'loss', 'content': 0.012753091752529144, 'timestamp': '2025-09-30 23:06:33.275075', 'step': 3638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:33.331623', 'step': 3638, 'epoch': 2} {'type': 'loss', 'content': 0.007095198146998882, 'timestamp': '2025-09-30 23:06:33.334236', 'step': 3639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:33.397247', 'step': 3639, 'epoch': 2} {'type': 'loss', 'content': 0.004936773795634508, 'timestamp': '2025-09-30 23:06:33.408189', 'step': 3640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:33.463571', 'step': 3640, 'epoch': 2} {'type': 'loss', 'content': 0.025077050551772118, 'timestamp': '2025-09-30 23:06:33.478107', 'step': 3641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:33.538094', 'step': 3641, 'epoch': 2} {'type': 'loss', 'content': 0.021482111886143684, 'timestamp': '2025-09-30 23:06:33.541218', 'step': 3642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:33.601615', 'step': 3642, 'epoch': 2} {'type': 'loss', 'content': 0.0007270852802321315, 'timestamp': '2025-09-30 23:06:33.604936', 'step': 3643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:33.669079', 'step': 3643, 'epoch': 2} {'type': 'loss', 'content': 0.018462786450982094, 'timestamp': '2025-09-30 23:06:33.676776', 'step': 3644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:33.735191', 'step': 3644, 'epoch': 2} {'type': 'loss', 'content': 0.006875508464872837, 'timestamp': '2025-09-30 23:06:33.738823', 'step': 3645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:33.795343', 'step': 3645, 'epoch': 2} {'type': 'loss', 'content': 0.023268207907676697, 'timestamp': '2025-09-30 23:06:33.799890', 'step': 3646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:33.860356', 'step': 3646, 'epoch': 2} {'type': 'loss', 'content': 0.015046167187392712, 'timestamp': '2025-09-30 23:06:33.868899', 'step': 3647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:33.925276', 'step': 3647, 'epoch': 2} {'type': 'loss', 'content': 0.0029485945124179125, 'timestamp': '2025-09-30 23:06:33.932400', 'step': 3648, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1596914505344}], 'timestamp': '2025-09-30 23:06:37.841506', 'step': 3648, 'epoch': 2} {'type': 'pplx', 'content': 6798665.114729515, 'timestamp': '2025-09-30 23:06:37.846377', 'step': 3648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:37.899737', 'step': 3648, 'epoch': 2} {'type': 'loss', 'content': 0.02557421661913395, 'timestamp': '2025-09-30 23:06:37.903078', 'step': 3649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:37.958971', 'step': 3649, 'epoch': 2} {'type': 'loss', 'content': 0.013189044781029224, 'timestamp': '2025-09-30 23:06:37.964553', 'step': 3650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:38.020426', 'step': 3650, 'epoch': 2} {'type': 'loss', 'content': 0.011585342697799206, 'timestamp': '2025-09-30 23:06:38.023059', 'step': 3651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:38.079422', 'step': 3651, 'epoch': 2} {'type': 'loss', 'content': 0.01837574504315853, 'timestamp': '2025-09-30 23:06:38.086197', 'step': 3652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:38.143280', 'step': 3652, 'epoch': 2} {'type': 'loss', 'content': 0.008133253082633018, 'timestamp': '2025-09-30 23:06:38.146713', 'step': 3653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:38.212379', 'step': 3653, 'epoch': 2} {'type': 'loss', 'content': 0.040716201066970825, 'timestamp': '2025-09-30 23:06:38.215647', 'step': 3654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:38.271539', 'step': 3654, 'epoch': 2} {'type': 'loss', 'content': 0.014276211149990559, 'timestamp': '2025-09-30 23:06:38.275203', 'step': 3655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:38.334221', 'step': 3655, 'epoch': 2} {'type': 'loss', 'content': 0.008316121064126492, 'timestamp': '2025-09-30 23:06:38.339897', 'step': 3656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:38.396010', 'step': 3656, 'epoch': 2} {'type': 'loss', 'content': 0.01796230860054493, 'timestamp': '2025-09-30 23:06:38.399655', 'step': 3657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:38.455314', 'step': 3657, 'epoch': 2} {'type': 'loss', 'content': 0.017793985083699226, 'timestamp': '2025-09-30 23:06:38.458028', 'step': 3658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:38.515877', 'step': 3658, 'epoch': 2} {'type': 'loss', 'content': 0.00788914505392313, 'timestamp': '2025-09-30 23:06:38.520766', 'step': 3659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:38.581068', 'step': 3659, 'epoch': 2} {'type': 'loss', 'content': 0.004823833703994751, 'timestamp': '2025-09-30 23:06:38.587362', 'step': 3660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:38.642472', 'step': 3660, 'epoch': 2} {'type': 'loss', 'content': 0.010414980351924896, 'timestamp': '2025-09-30 23:06:38.652760', 'step': 3661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:38.720687', 'step': 3661, 'epoch': 2} {'type': 'loss', 'content': 0.02202628366649151, 'timestamp': '2025-09-30 23:06:38.724234', 'step': 3662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:38.780641', 'step': 3662, 'epoch': 2} {'type': 'loss', 'content': 0.029059795662760735, 'timestamp': '2025-09-30 23:06:38.783018', 'step': 3663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:38.845654', 'step': 3663, 'epoch': 2} {'type': 'loss', 'content': 0.017916211858391762, 'timestamp': '2025-09-30 23:06:38.857807', 'step': 3664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:38.914294', 'step': 3664, 'epoch': 2} {'type': 'loss', 'content': 0.0176063384860754, 'timestamp': '2025-09-30 23:06:38.921342', 'step': 3665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:38.983592', 'step': 3665, 'epoch': 2} {'type': 'loss', 'content': 0.01234513521194458, 'timestamp': '2025-09-30 23:06:38.987088', 'step': 3666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:39.060511', 'step': 3666, 'epoch': 2} {'type': 'loss', 'content': 0.042979247868061066, 'timestamp': '2025-09-30 23:06:39.064450', 'step': 3667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:39.126061', 'step': 3667, 'epoch': 2} {'type': 'loss', 'content': 0.03262157365679741, 'timestamp': '2025-09-30 23:06:39.133785', 'step': 3668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:39.192001', 'step': 3668, 'epoch': 2} {'type': 'loss', 'content': 0.04609664902091026, 'timestamp': '2025-09-30 23:06:39.196846', 'step': 3669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:39.252231', 'step': 3669, 'epoch': 2} {'type': 'loss', 'content': 0.0015626924578100443, 'timestamp': '2025-09-30 23:06:39.257860', 'step': 3670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:39.318973', 'step': 3670, 'epoch': 2} {'type': 'loss', 'content': 0.0010208532912656665, 'timestamp': '2025-09-30 23:06:39.323812', 'step': 3671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:39.381652', 'step': 3671, 'epoch': 2} {'type': 'loss', 'content': 0.002119294600561261, 'timestamp': '2025-09-30 23:06:39.389349', 'step': 3672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:39.444830', 'step': 3672, 'epoch': 2} {'type': 'loss', 'content': 0.02170015312731266, 'timestamp': '2025-09-30 23:06:39.448801', 'step': 3673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:39.508332', 'step': 3673, 'epoch': 2} {'type': 'loss', 'content': 0.018235506489872932, 'timestamp': '2025-09-30 23:06:39.512274', 'step': 3674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:39.568660', 'step': 3674, 'epoch': 2} {'type': 'loss', 'content': 0.012484592385590076, 'timestamp': '2025-09-30 23:06:39.572035', 'step': 3675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:39.628908', 'step': 3675, 'epoch': 2} {'type': 'loss', 'content': 0.020781442523002625, 'timestamp': '2025-09-30 23:06:39.636139', 'step': 3676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:39.694271', 'step': 3676, 'epoch': 2} {'type': 'loss', 'content': 0.04267888143658638, 'timestamp': '2025-09-30 23:06:39.698983', 'step': 3677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:39.756607', 'step': 3677, 'epoch': 2} {'type': 'loss', 'content': 0.058268677443265915, 'timestamp': '2025-09-30 23:06:39.762293', 'step': 3678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:39.820045', 'step': 3678, 'epoch': 2} {'type': 'loss', 'content': 0.04238419979810715, 'timestamp': '2025-09-30 23:06:39.824279', 'step': 3679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:39.880232', 'step': 3679, 'epoch': 2} {'type': 'loss', 'content': 0.010029071941971779, 'timestamp': '2025-09-30 23:06:39.886890', 'step': 3680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:39.941831', 'step': 3680, 'epoch': 2} {'type': 'loss', 'content': 0.05123711749911308, 'timestamp': '2025-09-30 23:06:39.945138', 'step': 3681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:40.001984', 'step': 3681, 'epoch': 2} {'type': 'loss', 'content': 0.04520295932888985, 'timestamp': '2025-09-30 23:06:40.005322', 'step': 3682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:06:40.059043', 'step': 3682, 'epoch': 2} {'type': 'loss', 'content': 0.012900966219604015, 'timestamp': '2025-09-30 23:06:40.062294', 'step': 3683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:40.122945', 'step': 3683, 'epoch': 2} {'type': 'loss', 'content': 0.0054120211862027645, 'timestamp': '2025-09-30 23:06:40.138077', 'step': 3684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:40.210239', 'step': 3684, 'epoch': 2} {'type': 'loss', 'content': 0.042115747928619385, 'timestamp': '2025-09-30 23:06:40.213552', 'step': 3685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:40.269754', 'step': 3685, 'epoch': 2} {'type': 'loss', 'content': 0.015454180538654327, 'timestamp': '2025-09-30 23:06:40.272330', 'step': 3686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:40.331745', 'step': 3686, 'epoch': 2} {'type': 'loss', 'content': 0.016007984057068825, 'timestamp': '2025-09-30 23:06:40.336274', 'step': 3687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:40.391532', 'step': 3687, 'epoch': 2} {'type': 'loss', 'content': 0.03628537803888321, 'timestamp': '2025-09-30 23:06:40.399257', 'step': 3688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:40.453259', 'step': 3688, 'epoch': 2} {'type': 'loss', 'content': 0.013583557680249214, 'timestamp': '2025-09-30 23:06:40.456340', 'step': 3689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:40.513998', 'step': 3689, 'epoch': 2} {'type': 'loss', 'content': 0.03921859338879585, 'timestamp': '2025-09-30 23:06:40.516985', 'step': 3690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:40.575780', 'step': 3690, 'epoch': 2} {'type': 'loss', 'content': 0.016558939591050148, 'timestamp': '2025-09-30 23:06:40.579033', 'step': 3691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:40.634135', 'step': 3691, 'epoch': 2} {'type': 'loss', 'content': 0.04051077365875244, 'timestamp': '2025-09-30 23:06:40.641176', 'step': 3692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:40.695485', 'step': 3692, 'epoch': 2} {'type': 'loss', 'content': 0.04661056399345398, 'timestamp': '2025-09-30 23:06:40.701128', 'step': 3693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:40.757356', 'step': 3693, 'epoch': 2} {'type': 'loss', 'content': 0.026479240506887436, 'timestamp': '2025-09-30 23:06:40.760863', 'step': 3694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:40.815762', 'step': 3694, 'epoch': 2} {'type': 'loss', 'content': 0.016673751175403595, 'timestamp': '2025-09-30 23:06:40.818916', 'step': 3695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:40.876674', 'step': 3695, 'epoch': 2} {'type': 'loss', 'content': 0.02429375983774662, 'timestamp': '2025-09-30 23:06:40.883027', 'step': 3696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:40.937728', 'step': 3696, 'epoch': 2} {'type': 'loss', 'content': 0.012361456640064716, 'timestamp': '2025-09-30 23:06:40.940984', 'step': 3697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:40.996830', 'step': 3697, 'epoch': 2} {'type': 'loss', 'content': 0.006269126199185848, 'timestamp': '2025-09-30 23:06:40.999982', 'step': 3698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:41.055696', 'step': 3698, 'epoch': 2} {'type': 'loss', 'content': 0.01706579141318798, 'timestamp': '2025-09-30 23:06:41.062778', 'step': 3699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:41.130794', 'step': 3699, 'epoch': 2} {'type': 'loss', 'content': 0.03764230012893677, 'timestamp': '2025-09-30 23:06:41.137243', 'step': 3700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:41.209889', 'step': 3700, 'epoch': 2} {'type': 'loss', 'content': 0.0092382887378335, 'timestamp': '2025-09-30 23:06:41.217133', 'step': 3701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:41.296830', 'step': 3701, 'epoch': 2} {'type': 'loss', 'content': 0.05066206678748131, 'timestamp': '2025-09-30 23:06:41.302311', 'step': 3702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:41.369358', 'step': 3702, 'epoch': 2} {'type': 'loss', 'content': 0.006532914005219936, 'timestamp': '2025-09-30 23:06:41.374276', 'step': 3703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:41.437676', 'step': 3703, 'epoch': 2} {'type': 'loss', 'content': 0.004836058709770441, 'timestamp': '2025-09-30 23:06:41.447671', 'step': 3704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:41.518689', 'step': 3704, 'epoch': 2} {'type': 'loss', 'content': 0.009742854163050652, 'timestamp': '2025-09-30 23:06:41.523591', 'step': 3705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:41.577036', 'step': 3705, 'epoch': 2} {'type': 'loss', 'content': 0.0210539810359478, 'timestamp': '2025-09-30 23:06:41.581625', 'step': 3706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:41.638178', 'step': 3706, 'epoch': 2} {'type': 'loss', 'content': 0.004805517848581076, 'timestamp': '2025-09-30 23:06:41.643366', 'step': 3707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:41.704232', 'step': 3707, 'epoch': 2} {'type': 'loss', 'content': 0.024895841255784035, 'timestamp': '2025-09-30 23:06:41.716713', 'step': 3708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:41.784035', 'step': 3708, 'epoch': 2} {'type': 'loss', 'content': 0.011068696156144142, 'timestamp': '2025-09-30 23:06:41.787372', 'step': 3709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:41.864194', 'step': 3709, 'epoch': 2} {'type': 'loss', 'content': 0.010008474811911583, 'timestamp': '2025-09-30 23:06:41.866791', 'step': 3710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:41.925741', 'step': 3710, 'epoch': 2} {'type': 'loss', 'content': 0.004797891713678837, 'timestamp': '2025-09-30 23:06:41.932823', 'step': 3711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:41.996076', 'step': 3711, 'epoch': 2} {'type': 'loss', 'content': 0.007526793982833624, 'timestamp': '2025-09-30 23:06:42.003430', 'step': 3712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:42.068092', 'step': 3712, 'epoch': 2} {'type': 'loss', 'content': 0.006566990166902542, 'timestamp': '2025-09-30 23:06:42.072802', 'step': 3713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:42.132774', 'step': 3713, 'epoch': 2} {'type': 'loss', 'content': 0.047697316855192184, 'timestamp': '2025-09-30 23:06:42.135837', 'step': 3714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:42.201444', 'step': 3714, 'epoch': 2} {'type': 'loss', 'content': 0.004836773965507746, 'timestamp': '2025-09-30 23:06:42.214476', 'step': 3715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:42.287106', 'step': 3715, 'epoch': 2} {'type': 'loss', 'content': 0.0026517589576542377, 'timestamp': '2025-09-30 23:06:42.295987', 'step': 3716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:06:42.361158', 'step': 3716, 'epoch': 2} {'type': 'loss', 'content': 0.003695851191878319, 'timestamp': '2025-09-30 23:06:42.366210', 'step': 3717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:42.427751', 'step': 3717, 'epoch': 2} {'type': 'loss', 'content': 0.016044633463025093, 'timestamp': '2025-09-30 23:06:42.434197', 'step': 3718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:42.501494', 'step': 3718, 'epoch': 2} {'type': 'loss', 'content': 0.02792280726134777, 'timestamp': '2025-09-30 23:06:42.504942', 'step': 3719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:42.568128', 'step': 3719, 'epoch': 2} {'type': 'loss', 'content': 0.010456285439431667, 'timestamp': '2025-09-30 23:06:42.574874', 'step': 3720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:42.636063', 'step': 3720, 'epoch': 2} {'type': 'loss', 'content': 0.01976151205599308, 'timestamp': '2025-09-30 23:06:42.643775', 'step': 3721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:42.717709', 'step': 3721, 'epoch': 2} {'type': 'loss', 'content': 0.03441311791539192, 'timestamp': '2025-09-30 23:06:42.724818', 'step': 3722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:42.792776', 'step': 3722, 'epoch': 2} {'type': 'loss', 'content': 0.0044487835839390755, 'timestamp': '2025-09-30 23:06:42.795895', 'step': 3723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:42.849946', 'step': 3723, 'epoch': 2} {'type': 'loss', 'content': 0.0015283755492419004, 'timestamp': '2025-09-30 23:06:42.861926', 'step': 3724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:42.921304', 'step': 3724, 'epoch': 2} {'type': 'loss', 'content': 0.03667603060603142, 'timestamp': '2025-09-30 23:06:42.926537', 'step': 3725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:42.988074', 'step': 3725, 'epoch': 2} {'type': 'loss', 'content': 0.028880516067147255, 'timestamp': '2025-09-30 23:06:42.993012', 'step': 3726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:43.067165', 'step': 3726, 'epoch': 2} {'type': 'loss', 'content': 0.03995535150170326, 'timestamp': '2025-09-30 23:06:43.079000', 'step': 3727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:43.151150', 'step': 3727, 'epoch': 2} {'type': 'loss', 'content': 0.049847472459077835, 'timestamp': '2025-09-30 23:06:43.161553', 'step': 3728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:43.221386', 'step': 3728, 'epoch': 2} {'type': 'loss', 'content': 0.004747297614812851, 'timestamp': '2025-09-30 23:06:43.227189', 'step': 3729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:43.291656', 'step': 3729, 'epoch': 2} {'type': 'loss', 'content': 0.06259100139141083, 'timestamp': '2025-09-30 23:06:43.300577', 'step': 3730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:43.369323', 'step': 3730, 'epoch': 2} {'type': 'loss', 'content': 0.04281577095389366, 'timestamp': '2025-09-30 23:06:43.374436', 'step': 3731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:43.443703', 'step': 3731, 'epoch': 2} {'type': 'loss', 'content': 0.020904015749692917, 'timestamp': '2025-09-30 23:06:43.450373', 'step': 3732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:43.511464', 'step': 3732, 'epoch': 2} {'type': 'loss', 'content': 0.011904587037861347, 'timestamp': '2025-09-30 23:06:43.515747', 'step': 3733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:43.582903', 'step': 3733, 'epoch': 2} {'type': 'loss', 'content': 0.004161340184509754, 'timestamp': '2025-09-30 23:06:43.592267', 'step': 3734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:43.679292', 'step': 3734, 'epoch': 2} {'type': 'loss', 'content': 0.013847628608345985, 'timestamp': '2025-09-30 23:06:43.691348', 'step': 3735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:43.771697', 'step': 3735, 'epoch': 2} {'type': 'loss', 'content': 0.005291007924824953, 'timestamp': '2025-09-30 23:06:43.780898', 'step': 3736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:43.841415', 'step': 3736, 'epoch': 2} {'type': 'loss', 'content': 0.01424913015216589, 'timestamp': '2025-09-30 23:06:43.848880', 'step': 3737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:43.921068', 'step': 3737, 'epoch': 2} {'type': 'loss', 'content': 0.021748213097453117, 'timestamp': '2025-09-30 23:06:43.925901', 'step': 3738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:43.989041', 'step': 3738, 'epoch': 2} {'type': 'loss', 'content': 0.0014115060912445188, 'timestamp': '2025-09-30 23:06:43.994316', 'step': 3739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:44.053218', 'step': 3739, 'epoch': 2} {'type': 'loss', 'content': 0.0012966615613549948, 'timestamp': '2025-09-30 23:06:44.062007', 'step': 3740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:44.122816', 'step': 3740, 'epoch': 2} {'type': 'loss', 'content': 0.0019588868599385023, 'timestamp': '2025-09-30 23:06:44.138106', 'step': 3741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:44.230629', 'step': 3741, 'epoch': 2} {'type': 'loss', 'content': 0.0012897460255771875, 'timestamp': '2025-09-30 23:06:44.240960', 'step': 3742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:44.310107', 'step': 3742, 'epoch': 2} {'type': 'loss', 'content': 0.003202429274097085, 'timestamp': '2025-09-30 23:06:44.316006', 'step': 3743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:44.371365', 'step': 3743, 'epoch': 2} {'type': 'loss', 'content': 0.007601547986268997, 'timestamp': '2025-09-30 23:06:44.379249', 'step': 3744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:44.437531', 'step': 3744, 'epoch': 2} {'type': 'loss', 'content': 0.06527835875749588, 'timestamp': '2025-09-30 23:06:44.439651', 'step': 3745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:44.494277', 'step': 3745, 'epoch': 2} {'type': 'loss', 'content': 0.0733853206038475, 'timestamp': '2025-09-30 23:06:44.497832', 'step': 3746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:44.555677', 'step': 3746, 'epoch': 2} {'type': 'loss', 'content': 0.008759655058383942, 'timestamp': '2025-09-30 23:06:44.562359', 'step': 3747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:44.628189', 'step': 3747, 'epoch': 2} {'type': 'loss', 'content': 0.0028844736516475677, 'timestamp': '2025-09-30 23:06:44.639280', 'step': 3748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:44.695423', 'step': 3748, 'epoch': 2} {'type': 'loss', 'content': 0.0025053229182958603, 'timestamp': '2025-09-30 23:06:44.702915', 'step': 3749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:44.773724', 'step': 3749, 'epoch': 2} {'type': 'loss', 'content': 0.011699942871928215, 'timestamp': '2025-09-30 23:06:44.778056', 'step': 3750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:44.836300', 'step': 3750, 'epoch': 2} {'type': 'loss', 'content': 0.005764603149145842, 'timestamp': '2025-09-30 23:06:44.839061', 'step': 3751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:44.895382', 'step': 3751, 'epoch': 2} {'type': 'loss', 'content': 0.021340716630220413, 'timestamp': '2025-09-30 23:06:44.901498', 'step': 3752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:44.959773', 'step': 3752, 'epoch': 2} {'type': 'loss', 'content': 0.0005865769344381988, 'timestamp': '2025-09-30 23:06:44.965250', 'step': 3753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:45.024126', 'step': 3753, 'epoch': 2} {'type': 'loss', 'content': 0.0017894022166728973, 'timestamp': '2025-09-30 23:06:45.027655', 'step': 3754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:45.085359', 'step': 3754, 'epoch': 2} {'type': 'loss', 'content': 0.00046843363088555634, 'timestamp': '2025-09-30 23:06:45.091864', 'step': 3755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:45.148819', 'step': 3755, 'epoch': 2} {'type': 'loss', 'content': 0.053765665739774704, 'timestamp': '2025-09-30 23:06:45.159335', 'step': 3756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:45.215615', 'step': 3756, 'epoch': 2} {'type': 'loss', 'content': 0.03146874159574509, 'timestamp': '2025-09-30 23:06:45.221596', 'step': 3757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:45.281858', 'step': 3757, 'epoch': 2} {'type': 'loss', 'content': 0.004202323034405708, 'timestamp': '2025-09-30 23:06:45.287536', 'step': 3758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:45.357526', 'step': 3758, 'epoch': 2} {'type': 'loss', 'content': 0.03823010250926018, 'timestamp': '2025-09-30 23:06:45.363052', 'step': 3759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:45.426687', 'step': 3759, 'epoch': 2} {'type': 'loss', 'content': 0.006785305682569742, 'timestamp': '2025-09-30 23:06:45.436546', 'step': 3760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:45.494860', 'step': 3760, 'epoch': 2} {'type': 'loss', 'content': 0.009731217287480831, 'timestamp': '2025-09-30 23:06:45.503728', 'step': 3761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:45.563128', 'step': 3761, 'epoch': 2} {'type': 'loss', 'content': 0.007696975022554398, 'timestamp': '2025-09-30 23:06:45.571343', 'step': 3762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:45.635200', 'step': 3762, 'epoch': 2} {'type': 'loss', 'content': 0.005093725863844156, 'timestamp': '2025-09-30 23:06:45.639117', 'step': 3763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:45.699078', 'step': 3763, 'epoch': 2} {'type': 'loss', 'content': 0.002014064695686102, 'timestamp': '2025-09-30 23:06:45.706033', 'step': 3764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:45.764045', 'step': 3764, 'epoch': 2} {'type': 'loss', 'content': 0.0320407934486866, 'timestamp': '2025-09-30 23:06:45.773306', 'step': 3765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:45.850638', 'step': 3765, 'epoch': 2} {'type': 'loss', 'content': 0.029801324009895325, 'timestamp': '2025-09-30 23:06:45.857275', 'step': 3766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:45.911910', 'step': 3766, 'epoch': 2} {'type': 'loss', 'content': 0.04913346841931343, 'timestamp': '2025-09-30 23:06:45.917825', 'step': 3767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:45.980190', 'step': 3767, 'epoch': 2} {'type': 'loss', 'content': 0.03929266333580017, 'timestamp': '2025-09-30 23:06:45.986356', 'step': 3768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:46.058880', 'step': 3768, 'epoch': 2} {'type': 'loss', 'content': 0.001826618448831141, 'timestamp': '2025-09-30 23:06:46.063045', 'step': 3769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:46.123775', 'step': 3769, 'epoch': 2} {'type': 'loss', 'content': 0.027799973264336586, 'timestamp': '2025-09-30 23:06:46.127527', 'step': 3770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:46.183711', 'step': 3770, 'epoch': 2} {'type': 'loss', 'content': 0.018036527559161186, 'timestamp': '2025-09-30 23:06:46.189924', 'step': 3771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:46.252625', 'step': 3771, 'epoch': 2} {'type': 'loss', 'content': 0.0350886769592762, 'timestamp': '2025-09-30 23:06:46.261536', 'step': 3772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:46.322345', 'step': 3772, 'epoch': 2} {'type': 'loss', 'content': 0.0077455672435462475, 'timestamp': '2025-09-30 23:06:46.325122', 'step': 3773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:46.379354', 'step': 3773, 'epoch': 2} {'type': 'loss', 'content': 0.034018952399492264, 'timestamp': '2025-09-30 23:06:46.388845', 'step': 3774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:46.450621', 'step': 3774, 'epoch': 2} {'type': 'loss', 'content': 0.04717526212334633, 'timestamp': '2025-09-30 23:06:46.457346', 'step': 3775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:46.519409', 'step': 3775, 'epoch': 2} {'type': 'loss', 'content': 0.0602630153298378, 'timestamp': '2025-09-30 23:06:46.525864', 'step': 3776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:46.583514', 'step': 3776, 'epoch': 2} {'type': 'loss', 'content': 0.05537250265479088, 'timestamp': '2025-09-30 23:06:46.588079', 'step': 3777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:46.642344', 'step': 3777, 'epoch': 2} {'type': 'loss', 'content': 0.010058033280074596, 'timestamp': '2025-09-30 23:06:46.647315', 'step': 3778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:46.709625', 'step': 3778, 'epoch': 2} {'type': 'loss', 'content': 0.011785500682890415, 'timestamp': '2025-09-30 23:06:46.719761', 'step': 3779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:46.799912', 'step': 3779, 'epoch': 2} {'type': 'loss', 'content': 0.03579826280474663, 'timestamp': '2025-09-30 23:06:46.812186', 'step': 3780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:46.876993', 'step': 3780, 'epoch': 2} {'type': 'loss', 'content': 0.00901669543236494, 'timestamp': '2025-09-30 23:06:46.882387', 'step': 3781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:46.942593', 'step': 3781, 'epoch': 2} {'type': 'loss', 'content': 0.05602555721998215, 'timestamp': '2025-09-30 23:06:46.948153', 'step': 3782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:47.006945', 'step': 3782, 'epoch': 2} {'type': 'loss', 'content': 0.0344758965075016, 'timestamp': '2025-09-30 23:06:47.010742', 'step': 3783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:47.067750', 'step': 3783, 'epoch': 2} {'type': 'loss', 'content': 0.02757844887673855, 'timestamp': '2025-09-30 23:06:47.076431', 'step': 3784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:47.142036', 'step': 3784, 'epoch': 2} {'type': 'loss', 'content': 0.017513206228613853, 'timestamp': '2025-09-30 23:06:47.145113', 'step': 3785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:47.218381', 'step': 3785, 'epoch': 2} {'type': 'loss', 'content': 0.0043951901607215405, 'timestamp': '2025-09-30 23:06:47.221484', 'step': 3786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:47.277097', 'step': 3786, 'epoch': 2} {'type': 'loss', 'content': 0.004695740062743425, 'timestamp': '2025-09-30 23:06:47.280366', 'step': 3787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:47.341301', 'step': 3787, 'epoch': 2} {'type': 'loss', 'content': 0.00703089265152812, 'timestamp': '2025-09-30 23:06:47.347826', 'step': 3788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:47.402711', 'step': 3788, 'epoch': 2} {'type': 'loss', 'content': 0.008212202228605747, 'timestamp': '2025-09-30 23:06:47.411440', 'step': 3789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:47.480627', 'step': 3789, 'epoch': 2} {'type': 'loss', 'content': 0.01356283575296402, 'timestamp': '2025-09-30 23:06:47.485149', 'step': 3790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:47.548274', 'step': 3790, 'epoch': 2} {'type': 'loss', 'content': 0.01966235227882862, 'timestamp': '2025-09-30 23:06:47.551303', 'step': 3791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:47.619772', 'step': 3791, 'epoch': 2} {'type': 'loss', 'content': 0.034837786108255386, 'timestamp': '2025-09-30 23:06:47.632021', 'step': 3792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:47.699786', 'step': 3792, 'epoch': 2} {'type': 'loss', 'content': 0.009445003233850002, 'timestamp': '2025-09-30 23:06:47.711276', 'step': 3793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:47.773612', 'step': 3793, 'epoch': 2} {'type': 'loss', 'content': 0.01823047362267971, 'timestamp': '2025-09-30 23:06:47.778852', 'step': 3794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:47.845285', 'step': 3794, 'epoch': 2} {'type': 'loss', 'content': 0.009119567461311817, 'timestamp': '2025-09-30 23:06:47.851775', 'step': 3795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:47.910915', 'step': 3795, 'epoch': 2} {'type': 'loss', 'content': 0.0014342659851536155, 'timestamp': '2025-09-30 23:06:47.919047', 'step': 3796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:47.982090', 'step': 3796, 'epoch': 2} {'type': 'loss', 'content': 0.043474048376083374, 'timestamp': '2025-09-30 23:06:47.985068', 'step': 3797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:06:48.041474', 'step': 3797, 'epoch': 2} {'type': 'loss', 'content': 0.00802943017333746, 'timestamp': '2025-09-30 23:06:48.044390', 'step': 3798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:48.102931', 'step': 3798, 'epoch': 2} {'type': 'loss', 'content': 0.009069249965250492, 'timestamp': '2025-09-30 23:06:48.110790', 'step': 3799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:48.175026', 'step': 3799, 'epoch': 2} {'type': 'loss', 'content': 0.004523504991084337, 'timestamp': '2025-09-30 23:06:48.182194', 'step': 3800, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1596914505344}], 'timestamp': '2025-09-30 23:06:52.177230', 'step': 3800, 'epoch': 2} {'type': 'pplx', 'content': 8224026.2664651, 'timestamp': '2025-09-30 23:06:52.180778', 'step': 3800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:52.237520', 'step': 3800, 'epoch': 2} {'type': 'loss', 'content': 0.015963811427354813, 'timestamp': '2025-09-30 23:06:52.241296', 'step': 3801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:52.298631', 'step': 3801, 'epoch': 2} {'type': 'loss', 'content': 0.016270479187369347, 'timestamp': '2025-09-30 23:06:52.302269', 'step': 3802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:52.358520', 'step': 3802, 'epoch': 2} {'type': 'loss', 'content': 0.03429587930440903, 'timestamp': '2025-09-30 23:06:52.361943', 'step': 3803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:52.420575', 'step': 3803, 'epoch': 2} {'type': 'loss', 'content': 0.014085431583225727, 'timestamp': '2025-09-30 23:06:52.427797', 'step': 3804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:52.496341', 'step': 3804, 'epoch': 2} {'type': 'loss', 'content': 0.05672192573547363, 'timestamp': '2025-09-30 23:06:52.498281', 'step': 3805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:52.556138', 'step': 3805, 'epoch': 2} {'type': 'loss', 'content': 0.016132446005940437, 'timestamp': '2025-09-30 23:06:52.559335', 'step': 3806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:52.614972', 'step': 3806, 'epoch': 2} {'type': 'loss', 'content': 0.008458097465336323, 'timestamp': '2025-09-30 23:06:52.619042', 'step': 3807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:52.677468', 'step': 3807, 'epoch': 2} {'type': 'loss', 'content': 0.033504072576761246, 'timestamp': '2025-09-30 23:06:52.684052', 'step': 3808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:52.739888', 'step': 3808, 'epoch': 2} {'type': 'loss', 'content': 0.028861569240689278, 'timestamp': '2025-09-30 23:06:52.742377', 'step': 3809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:52.796650', 'step': 3809, 'epoch': 2} {'type': 'loss', 'content': 0.006238930858671665, 'timestamp': '2025-09-30 23:06:52.799339', 'step': 3810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:52.852943', 'step': 3810, 'epoch': 2} {'type': 'loss', 'content': 0.0098000792786479, 'timestamp': '2025-09-30 23:06:52.855752', 'step': 3811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:52.911404', 'step': 3811, 'epoch': 2} {'type': 'loss', 'content': 0.015877695754170418, 'timestamp': '2025-09-30 23:06:52.917370', 'step': 3812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:52.970172', 'step': 3812, 'epoch': 2} {'type': 'loss', 'content': 0.03119598515331745, 'timestamp': '2025-09-30 23:06:52.979500', 'step': 3813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:53.043409', 'step': 3813, 'epoch': 2} {'type': 'loss', 'content': 0.03904598951339722, 'timestamp': '2025-09-30 23:06:53.046043', 'step': 3814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:53.101399', 'step': 3814, 'epoch': 2} {'type': 'loss', 'content': 0.009507318027317524, 'timestamp': '2025-09-30 23:06:53.104149', 'step': 3815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:53.157826', 'step': 3815, 'epoch': 2} {'type': 'loss', 'content': 0.08480391651391983, 'timestamp': '2025-09-30 23:06:53.163739', 'step': 3816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:53.218841', 'step': 3816, 'epoch': 2} {'type': 'loss', 'content': 0.05882308632135391, 'timestamp': '2025-09-30 23:06:53.222111', 'step': 3817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:53.288248', 'step': 3817, 'epoch': 2} {'type': 'loss', 'content': 0.019994014874100685, 'timestamp': '2025-09-30 23:06:53.291353', 'step': 3818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:53.347752', 'step': 3818, 'epoch': 2} {'type': 'loss', 'content': 0.016905685886740685, 'timestamp': '2025-09-30 23:06:53.350121', 'step': 3819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:53.404715', 'step': 3819, 'epoch': 2} {'type': 'loss', 'content': 0.005194817669689655, 'timestamp': '2025-09-30 23:06:53.419697', 'step': 3820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:53.472550', 'step': 3820, 'epoch': 2} {'type': 'loss', 'content': 0.01618771068751812, 'timestamp': '2025-09-30 23:06:53.475826', 'step': 3821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:53.531161', 'step': 3821, 'epoch': 2} {'type': 'loss', 'content': 0.016485711559653282, 'timestamp': '2025-09-30 23:06:53.533507', 'step': 3822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:53.587578', 'step': 3822, 'epoch': 2} {'type': 'loss', 'content': 0.007147447671741247, 'timestamp': '2025-09-30 23:06:53.590136', 'step': 3823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:53.644006', 'step': 3823, 'epoch': 2} {'type': 'loss', 'content': 0.012076713144779205, 'timestamp': '2025-09-30 23:06:53.649995', 'step': 3824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:53.704623', 'step': 3824, 'epoch': 2} {'type': 'loss', 'content': 0.004703056067228317, 'timestamp': '2025-09-30 23:06:53.707311', 'step': 3825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:53.760671', 'step': 3825, 'epoch': 2} {'type': 'loss', 'content': 0.007478112820535898, 'timestamp': '2025-09-30 23:06:53.763223', 'step': 3826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:53.816865', 'step': 3826, 'epoch': 2} {'type': 'loss', 'content': 0.011494643986225128, 'timestamp': '2025-09-30 23:06:53.818911', 'step': 3827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:53.871564', 'step': 3827, 'epoch': 2} {'type': 'loss', 'content': 0.015279280953109264, 'timestamp': '2025-09-30 23:06:53.877814', 'step': 3828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:53.938686', 'step': 3828, 'epoch': 2} {'type': 'loss', 'content': 0.03386322408914566, 'timestamp': '2025-09-30 23:06:53.941236', 'step': 3829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:53.994897', 'step': 3829, 'epoch': 2} {'type': 'loss', 'content': 0.002085780492052436, 'timestamp': '2025-09-30 23:06:53.997254', 'step': 3830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:54.050928', 'step': 3830, 'epoch': 2} {'type': 'loss', 'content': 0.001736610778607428, 'timestamp': '2025-09-30 23:06:54.054551', 'step': 3831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:54.107543', 'step': 3831, 'epoch': 2} {'type': 'loss', 'content': 0.012113498523831367, 'timestamp': '2025-09-30 23:06:54.113507', 'step': 3832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:54.171146', 'step': 3832, 'epoch': 2} {'type': 'loss', 'content': 0.0024852559436112642, 'timestamp': '2025-09-30 23:06:54.173439', 'step': 3833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:54.226548', 'step': 3833, 'epoch': 2} {'type': 'loss', 'content': 0.017744893208146095, 'timestamp': '2025-09-30 23:06:54.229876', 'step': 3834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:54.289758', 'step': 3834, 'epoch': 2} {'type': 'loss', 'content': 0.014992482960224152, 'timestamp': '2025-09-30 23:06:54.291964', 'step': 3835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:54.346780', 'step': 3835, 'epoch': 2} {'type': 'loss', 'content': 0.001151731121353805, 'timestamp': '2025-09-30 23:06:54.357380', 'step': 3836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:54.412439', 'step': 3836, 'epoch': 2} {'type': 'loss', 'content': 0.0089108319953084, 'timestamp': '2025-09-30 23:06:54.416512', 'step': 3837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:54.472006', 'step': 3837, 'epoch': 2} {'type': 'loss', 'content': 0.02689167857170105, 'timestamp': '2025-09-30 23:06:54.474726', 'step': 3838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:54.528388', 'step': 3838, 'epoch': 2} {'type': 'loss', 'content': 0.019114868715405464, 'timestamp': '2025-09-30 23:06:54.534913', 'step': 3839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:54.621524', 'step': 3839, 'epoch': 2} {'type': 'loss', 'content': 0.012741826474666595, 'timestamp': '2025-09-30 23:06:54.630585', 'step': 3840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:54.689617', 'step': 3840, 'epoch': 2} {'type': 'loss', 'content': 0.011184520088136196, 'timestamp': '2025-09-30 23:06:54.696087', 'step': 3841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:54.753860', 'step': 3841, 'epoch': 2} {'type': 'loss', 'content': 0.002391861518844962, 'timestamp': '2025-09-30 23:06:54.758220', 'step': 3842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:54.831667', 'step': 3842, 'epoch': 2} {'type': 'loss', 'content': 0.01190215814858675, 'timestamp': '2025-09-30 23:06:54.834355', 'step': 3843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:54.900898', 'step': 3843, 'epoch': 2} {'type': 'loss', 'content': 0.015081196092069149, 'timestamp': '2025-09-30 23:06:54.912275', 'step': 3844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:54.976939', 'step': 3844, 'epoch': 2} {'type': 'loss', 'content': 0.026228424161672592, 'timestamp': '2025-09-30 23:06:54.981608', 'step': 3845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:55.045012', 'step': 3845, 'epoch': 2} {'type': 'loss', 'content': 0.0019233705243095756, 'timestamp': '2025-09-30 23:06:55.054105', 'step': 3846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:55.125685', 'step': 3846, 'epoch': 2} {'type': 'loss', 'content': 0.04677456617355347, 'timestamp': '2025-09-30 23:06:55.134821', 'step': 3847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:55.212548', 'step': 3847, 'epoch': 2} {'type': 'loss', 'content': 0.0009800647385418415, 'timestamp': '2025-09-30 23:06:55.219537', 'step': 3848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:55.289772', 'step': 3848, 'epoch': 2} {'type': 'loss', 'content': 0.013316050171852112, 'timestamp': '2025-09-30 23:06:55.292760', 'step': 3849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:55.366975', 'step': 3849, 'epoch': 2} {'type': 'loss', 'content': 0.030226802453398705, 'timestamp': '2025-09-30 23:06:55.370815', 'step': 3850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:55.442671', 'step': 3850, 'epoch': 2} {'type': 'loss', 'content': 0.022257162258028984, 'timestamp': '2025-09-30 23:06:55.450323', 'step': 3851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:55.516913', 'step': 3851, 'epoch': 2} {'type': 'loss', 'content': 0.0036283000372350216, 'timestamp': '2025-09-30 23:06:55.533082', 'step': 3852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:55.603290', 'step': 3852, 'epoch': 2} {'type': 'loss', 'content': 0.01238669641315937, 'timestamp': '2025-09-30 23:06:55.609986', 'step': 3853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:55.677287', 'step': 3853, 'epoch': 2} {'type': 'loss', 'content': 0.009724738076329231, 'timestamp': '2025-09-30 23:06:55.684206', 'step': 3854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:55.746654', 'step': 3854, 'epoch': 2} {'type': 'loss', 'content': 0.002192980842664838, 'timestamp': '2025-09-30 23:06:55.754609', 'step': 3855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:55.814429', 'step': 3855, 'epoch': 2} {'type': 'loss', 'content': 0.00474701588973403, 'timestamp': '2025-09-30 23:06:55.824422', 'step': 3856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:55.893506', 'step': 3856, 'epoch': 2} {'type': 'loss', 'content': 0.0042966692708432674, 'timestamp': '2025-09-30 23:06:55.896585', 'step': 3857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:55.969890', 'step': 3857, 'epoch': 2} {'type': 'loss', 'content': 0.0036735523026436567, 'timestamp': '2025-09-30 23:06:55.976463', 'step': 3858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:56.043598', 'step': 3858, 'epoch': 2} {'type': 'loss', 'content': 0.05326366052031517, 'timestamp': '2025-09-30 23:06:56.052611', 'step': 3859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:56.117116', 'step': 3859, 'epoch': 2} {'type': 'loss', 'content': 0.0065001980401575565, 'timestamp': '2025-09-30 23:06:56.129249', 'step': 3860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:56.194510', 'step': 3860, 'epoch': 2} {'type': 'loss', 'content': 0.018028350546956062, 'timestamp': '2025-09-30 23:06:56.200241', 'step': 3861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:56.270119', 'step': 3861, 'epoch': 2} {'type': 'loss', 'content': 0.03905479982495308, 'timestamp': '2025-09-30 23:06:56.276692', 'step': 3862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:56.340817', 'step': 3862, 'epoch': 2} {'type': 'loss', 'content': 0.024974163621664047, 'timestamp': '2025-09-30 23:06:56.347470', 'step': 3863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:56.410272', 'step': 3863, 'epoch': 2} {'type': 'loss', 'content': 0.00996048841625452, 'timestamp': '2025-09-30 23:06:56.417239', 'step': 3864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:56.475948', 'step': 3864, 'epoch': 2} {'type': 'loss', 'content': 0.007806436624377966, 'timestamp': '2025-09-30 23:06:56.478824', 'step': 3865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:56.538600', 'step': 3865, 'epoch': 2} {'type': 'loss', 'content': 0.01901497133076191, 'timestamp': '2025-09-30 23:06:56.543971', 'step': 3866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:56.608584', 'step': 3866, 'epoch': 2} {'type': 'loss', 'content': 0.0007056361064314842, 'timestamp': '2025-09-30 23:06:56.616828', 'step': 3867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:56.684156', 'step': 3867, 'epoch': 2} {'type': 'loss', 'content': 0.0014328593388199806, 'timestamp': '2025-09-30 23:06:56.695251', 'step': 3868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:56.750618', 'step': 3868, 'epoch': 2} {'type': 'loss', 'content': 0.038042325526475906, 'timestamp': '2025-09-30 23:06:56.753269', 'step': 3869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:56.818662', 'step': 3869, 'epoch': 2} {'type': 'loss', 'content': 0.017401020973920822, 'timestamp': '2025-09-30 23:06:56.826580', 'step': 3870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:56.887917', 'step': 3870, 'epoch': 2} {'type': 'loss', 'content': 0.005358551163226366, 'timestamp': '2025-09-30 23:06:56.891333', 'step': 3871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:56.956940', 'step': 3871, 'epoch': 2} {'type': 'loss', 'content': 0.002235525520518422, 'timestamp': '2025-09-30 23:06:56.969357', 'step': 3872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:57.037288', 'step': 3872, 'epoch': 2} {'type': 'loss', 'content': 0.001188346417620778, 'timestamp': '2025-09-30 23:06:57.045871', 'step': 3873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:57.119921', 'step': 3873, 'epoch': 2} {'type': 'loss', 'content': 0.00030629904358647764, 'timestamp': '2025-09-30 23:06:57.125675', 'step': 3874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:57.189594', 'step': 3874, 'epoch': 2} {'type': 'loss', 'content': 0.0005544617888517678, 'timestamp': '2025-09-30 23:06:57.197598', 'step': 3875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:57.256987', 'step': 3875, 'epoch': 2} {'type': 'loss', 'content': 0.016752896830439568, 'timestamp': '2025-09-30 23:06:57.263939', 'step': 3876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:57.327681', 'step': 3876, 'epoch': 2} {'type': 'loss', 'content': 0.006289011798799038, 'timestamp': '2025-09-30 23:06:57.334302', 'step': 3877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:57.397373', 'step': 3877, 'epoch': 2} {'type': 'loss', 'content': 0.05260927230119705, 'timestamp': '2025-09-30 23:06:57.404716', 'step': 3878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:57.471949', 'step': 3878, 'epoch': 2} {'type': 'loss', 'content': 0.06915638595819473, 'timestamp': '2025-09-30 23:06:57.476827', 'step': 3879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:57.538340', 'step': 3879, 'epoch': 2} {'type': 'loss', 'content': 0.000860927568282932, 'timestamp': '2025-09-30 23:06:57.545444', 'step': 3880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:57.605020', 'step': 3880, 'epoch': 2} {'type': 'loss', 'content': 0.07697702199220657, 'timestamp': '2025-09-30 23:06:57.609614', 'step': 3881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:57.675546', 'step': 3881, 'epoch': 2} {'type': 'loss', 'content': 0.0016202522674575448, 'timestamp': '2025-09-30 23:06:57.681449', 'step': 3882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:57.746530', 'step': 3882, 'epoch': 2} {'type': 'loss', 'content': 0.016801271587610245, 'timestamp': '2025-09-30 23:06:57.753113', 'step': 3883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:57.818435', 'step': 3883, 'epoch': 2} {'type': 'loss', 'content': 0.0022621459793299437, 'timestamp': '2025-09-30 23:06:57.829148', 'step': 3884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:57.891462', 'step': 3884, 'epoch': 2} {'type': 'loss', 'content': 0.06608432531356812, 'timestamp': '2025-09-30 23:06:57.899185', 'step': 3885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:57.963255', 'step': 3885, 'epoch': 2} {'type': 'loss', 'content': 0.004168263636529446, 'timestamp': '2025-09-30 23:06:57.966997', 'step': 3886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:58.036276', 'step': 3886, 'epoch': 2} {'type': 'loss', 'content': 0.005262276623398066, 'timestamp': '2025-09-30 23:06:58.048215', 'step': 3887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:58.112361', 'step': 3887, 'epoch': 2} {'type': 'loss', 'content': 0.0006800551782362163, 'timestamp': '2025-09-30 23:06:58.118596', 'step': 3888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:58.188322', 'step': 3888, 'epoch': 2} {'type': 'loss', 'content': 0.0009501866297796369, 'timestamp': '2025-09-30 23:06:58.193343', 'step': 3889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:58.292261', 'step': 3889, 'epoch': 2} {'type': 'loss', 'content': 0.004328863229602575, 'timestamp': '2025-09-30 23:06:58.301503', 'step': 3890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:58.365651', 'step': 3890, 'epoch': 2} {'type': 'loss', 'content': 0.00434829480946064, 'timestamp': '2025-09-30 23:06:58.380603', 'step': 3891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:58.439686', 'step': 3891, 'epoch': 2} {'type': 'loss', 'content': 0.008894586004316807, 'timestamp': '2025-09-30 23:06:58.450766', 'step': 3892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:58.525761', 'step': 3892, 'epoch': 2} {'type': 'loss', 'content': 0.029572227969765663, 'timestamp': '2025-09-30 23:06:58.537129', 'step': 3893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:58.603640', 'step': 3893, 'epoch': 2} {'type': 'loss', 'content': 0.03956888988614082, 'timestamp': '2025-09-30 23:06:58.609119', 'step': 3894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:58.670420', 'step': 3894, 'epoch': 2} {'type': 'loss', 'content': 0.017053838819265366, 'timestamp': '2025-09-30 23:06:58.675194', 'step': 3895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:58.748087', 'step': 3895, 'epoch': 2} {'type': 'loss', 'content': 0.004041049629449844, 'timestamp': '2025-09-30 23:06:58.759726', 'step': 3896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:58.832376', 'step': 3896, 'epoch': 2} {'type': 'loss', 'content': 0.0026793465949594975, 'timestamp': '2025-09-30 23:06:58.843447', 'step': 3897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:58.926622', 'step': 3897, 'epoch': 2} {'type': 'loss', 'content': 0.051391128450632095, 'timestamp': '2025-09-30 23:06:58.937571', 'step': 3898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:59.007755', 'step': 3898, 'epoch': 2} {'type': 'loss', 'content': 0.003116082400083542, 'timestamp': '2025-09-30 23:06:59.016251', 'step': 3899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:59.087621', 'step': 3899, 'epoch': 2} {'type': 'loss', 'content': 0.001659535220824182, 'timestamp': '2025-09-30 23:06:59.101537', 'step': 3900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:59.179359', 'step': 3900, 'epoch': 2} {'type': 'loss', 'content': 0.0010797708528116345, 'timestamp': '2025-09-30 23:06:59.189035', 'step': 3901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:06:59.263061', 'step': 3901, 'epoch': 2} {'type': 'loss', 'content': 0.04858729988336563, 'timestamp': '2025-09-30 23:06:59.270356', 'step': 3902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:59.343574', 'step': 3902, 'epoch': 2} {'type': 'loss', 'content': 0.031155556440353394, 'timestamp': '2025-09-30 23:06:59.349767', 'step': 3903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:59.418715', 'step': 3903, 'epoch': 2} {'type': 'loss', 'content': 0.008126089349389076, 'timestamp': '2025-09-30 23:06:59.428901', 'step': 3904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:59.492330', 'step': 3904, 'epoch': 2} {'type': 'loss', 'content': 0.0013170819729566574, 'timestamp': '2025-09-30 23:06:59.498284', 'step': 3905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:59.561713', 'step': 3905, 'epoch': 2} {'type': 'loss', 'content': 0.01872302033007145, 'timestamp': '2025-09-30 23:06:59.567782', 'step': 3906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:59.633534', 'step': 3906, 'epoch': 2} {'type': 'loss', 'content': 0.00590894790366292, 'timestamp': '2025-09-30 23:06:59.641114', 'step': 3907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:06:59.707638', 'step': 3907, 'epoch': 2} {'type': 'loss', 'content': 0.08771926164627075, 'timestamp': '2025-09-30 23:06:59.718608', 'step': 3908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:59.784172', 'step': 3908, 'epoch': 2} {'type': 'loss', 'content': 0.02422463521361351, 'timestamp': '2025-09-30 23:06:59.793029', 'step': 3909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:59.863767', 'step': 3909, 'epoch': 2} {'type': 'loss', 'content': 0.015828024595975876, 'timestamp': '2025-09-30 23:06:59.872323', 'step': 3910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:06:59.949653', 'step': 3910, 'epoch': 2} {'type': 'loss', 'content': 0.018931491300463676, 'timestamp': '2025-09-30 23:06:59.956977', 'step': 3911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:00.036126', 'step': 3911, 'epoch': 2} {'type': 'loss', 'content': 0.006172641646116972, 'timestamp': '2025-09-30 23:07:00.050849', 'step': 3912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:00.117695', 'step': 3912, 'epoch': 2} {'type': 'loss', 'content': 0.0054049259051680565, 'timestamp': '2025-09-30 23:07:00.128717', 'step': 3913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:00.203278', 'step': 3913, 'epoch': 2} {'type': 'loss', 'content': 0.014966612681746483, 'timestamp': '2025-09-30 23:07:00.209024', 'step': 3914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:00.274671', 'step': 3914, 'epoch': 2} {'type': 'loss', 'content': 0.005238188896328211, 'timestamp': '2025-09-30 23:07:00.280396', 'step': 3915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:00.358473', 'step': 3915, 'epoch': 2} {'type': 'loss', 'content': 0.02569735050201416, 'timestamp': '2025-09-30 23:07:00.370703', 'step': 3916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:00.440626', 'step': 3916, 'epoch': 2} {'type': 'loss', 'content': 0.0012850059429183602, 'timestamp': '2025-09-30 23:07:00.447510', 'step': 3917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:00.512916', 'step': 3917, 'epoch': 2} {'type': 'loss', 'content': 0.009556901641190052, 'timestamp': '2025-09-30 23:07:00.520838', 'step': 3918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:00.591412', 'step': 3918, 'epoch': 2} {'type': 'loss', 'content': 0.001371346996165812, 'timestamp': '2025-09-30 23:07:00.601049', 'step': 3919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:00.681686', 'step': 3919, 'epoch': 2} {'type': 'loss', 'content': 0.036630209535360336, 'timestamp': '2025-09-30 23:07:00.701566', 'step': 3920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:00.795994', 'step': 3920, 'epoch': 2} {'type': 'loss', 'content': 0.040694862604141235, 'timestamp': '2025-09-30 23:07:00.803875', 'step': 3921, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:00.893374', 'step': 3921, 'epoch': 2} {'type': 'loss', 'content': 0.051167067140340805, 'timestamp': '2025-09-30 23:07:00.913594', 'step': 3922, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:00.977273', 'step': 3922, 'epoch': 2} {'type': 'loss', 'content': 0.006322433706372976, 'timestamp': '2025-09-30 23:07:00.987998', 'step': 3923, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:01.057367', 'step': 3923, 'epoch': 2} {'type': 'loss', 'content': 0.008118086494505405, 'timestamp': '2025-09-30 23:07:01.069958', 'step': 3924, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:01.142720', 'step': 3924, 'epoch': 2} {'type': 'loss', 'content': 0.010999825783073902, 'timestamp': '2025-09-30 23:07:01.149610', 'step': 3925, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:01.220440', 'step': 3925, 'epoch': 2} {'type': 'loss', 'content': 0.022524813190102577, 'timestamp': '2025-09-30 23:07:01.232229', 'step': 3926, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:01.298916', 'step': 3926, 'epoch': 2} {'type': 'loss', 'content': 0.042803265154361725, 'timestamp': '2025-09-30 23:07:01.301599', 'step': 3927, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:01.372243', 'step': 3927, 'epoch': 2} {'type': 'loss', 'content': 0.039035554975271225, 'timestamp': '2025-09-30 23:07:01.383912', 'step': 3928, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:01.462503', 'step': 3928, 'epoch': 2} {'type': 'loss', 'content': 0.02143414132297039, 'timestamp': '2025-09-30 23:07:01.469627', 'step': 3929, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:01.543390', 'step': 3929, 'epoch': 2} {'type': 'loss', 'content': 0.0176884476095438, 'timestamp': '2025-09-30 23:07:01.551813', 'step': 3930, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:01.613738', 'step': 3930, 'epoch': 2} {'type': 'loss', 'content': 0.003998689819127321, 'timestamp': '2025-09-30 23:07:01.619842', 'step': 3931, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:01.689262', 'step': 3931, 'epoch': 2} {'type': 'loss', 'content': 0.00542555982246995, 'timestamp': '2025-09-30 23:07:01.701372', 'step': 3932, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:01.767730', 'step': 3932, 'epoch': 2} {'type': 'loss', 'content': 0.020474713295698166, 'timestamp': '2025-09-30 23:07:01.777228', 'step': 3933, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:01.844414', 'step': 3933, 'epoch': 2} {'type': 'loss', 'content': 0.02280432917177677, 'timestamp': '2025-09-30 23:07:01.850347', 'step': 3934, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:01.909149', 'step': 3934, 'epoch': 2} {'type': 'loss', 'content': 0.014567917212843895, 'timestamp': '2025-09-30 23:07:01.918509', 'step': 3935, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:01.984898', 'step': 3935, 'epoch': 2} {'type': 'loss', 'content': 0.008305520750582218, 'timestamp': '2025-09-30 23:07:01.993742', 'step': 3936, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:02.061572', 'step': 3936, 'epoch': 2} {'type': 'loss', 'content': 0.008170372806489468, 'timestamp': '2025-09-30 23:07:02.071269', 'step': 3937, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:02.140986', 'step': 3937, 'epoch': 2} {'type': 'loss', 'content': 0.010923058725893497, 'timestamp': '2025-09-30 23:07:02.146872', 'step': 3938, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:02.213136', 'step': 3938, 'epoch': 2} {'type': 'loss', 'content': 0.006785296369343996, 'timestamp': '2025-09-30 23:07:02.216641', 'step': 3939, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:02.279231', 'step': 3939, 'epoch': 2} {'type': 'loss', 'content': 0.016043109819293022, 'timestamp': '2025-09-30 23:07:02.290662', 'step': 3940, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:02.363198', 'step': 3940, 'epoch': 2} {'type': 'loss', 'content': 0.018462806940078735, 'timestamp': '2025-09-30 23:07:02.373722', 'step': 3941, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:02.445111', 'step': 3941, 'epoch': 2} {'type': 'loss', 'content': 0.006088082678616047, 'timestamp': '2025-09-30 23:07:02.448776', 'step': 3942, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:02.517747', 'step': 3942, 'epoch': 2} {'type': 'loss', 'content': 0.017666086554527283, 'timestamp': '2025-09-30 23:07:02.521407', 'step': 3943, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:02.589466', 'step': 3943, 'epoch': 2} {'type': 'loss', 'content': 0.006479550618678331, 'timestamp': '2025-09-30 23:07:02.597704', 'step': 3944, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:02.667609', 'step': 3944, 'epoch': 2} {'type': 'loss', 'content': 0.023752087727189064, 'timestamp': '2025-09-30 23:07:02.674871', 'step': 3945, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:02.744087', 'step': 3945, 'epoch': 2} {'type': 'loss', 'content': 0.05027903988957405, 'timestamp': '2025-09-30 23:07:02.750738', 'step': 3946, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:02.818064', 'step': 3946, 'epoch': 2} {'type': 'loss', 'content': 0.034060366451740265, 'timestamp': '2025-09-30 23:07:02.825779', 'step': 3947, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:02.895141', 'step': 3947, 'epoch': 2} {'type': 'loss', 'content': 0.028967110440135002, 'timestamp': '2025-09-30 23:07:02.901681', 'step': 3948, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:02.969291', 'step': 3948, 'epoch': 2} {'type': 'loss', 'content': 0.057537224143743515, 'timestamp': '2025-09-30 23:07:02.980839', 'step': 3949, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:03.052730', 'step': 3949, 'epoch': 2} {'type': 'loss', 'content': 0.04366103559732437, 'timestamp': '2025-09-30 23:07:03.061534', 'step': 3950, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:03.136132', 'step': 3950, 'epoch': 2} {'type': 'loss', 'content': 0.006625206209719181, 'timestamp': '2025-09-30 23:07:03.143445', 'step': 3951, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:03.206057', 'step': 3951, 'epoch': 2} {'type': 'loss', 'content': 0.003002975368872285, 'timestamp': '2025-09-30 23:07:03.213255', 'step': 3952, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1596914505344}], 'timestamp': '2025-09-30 23:07:08.089567', 'step': 3952, 'epoch': 2} {'type': 'pplx', 'content': 7514878.996089608, 'timestamp': '2025-09-30 23:07:08.099803', 'step': 3952, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:08.161388', 'step': 3952, 'epoch': 2} {'type': 'loss', 'content': 0.0035750146489590406, 'timestamp': '2025-09-30 23:07:08.169638', 'step': 3953, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:08.239231', 'step': 3953, 'epoch': 2} {'type': 'loss', 'content': 0.00628283154219389, 'timestamp': '2025-09-30 23:07:08.242090', 'step': 3954, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:08.312688', 'step': 3954, 'epoch': 2} {'type': 'loss', 'content': 0.012447274290025234, 'timestamp': '2025-09-30 23:07:08.319978', 'step': 3955, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:08.388653', 'step': 3955, 'epoch': 2} {'type': 'loss', 'content': 0.023303093388676643, 'timestamp': '2025-09-30 23:07:08.399290', 'step': 3956, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:08.469796', 'step': 3956, 'epoch': 2} {'type': 'loss', 'content': 0.020457271486520767, 'timestamp': '2025-09-30 23:07:08.477739', 'step': 3957, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:08.541061', 'step': 3957, 'epoch': 2} {'type': 'loss', 'content': 0.011199390515685081, 'timestamp': '2025-09-30 23:07:08.544482', 'step': 3958, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:08.608955', 'step': 3958, 'epoch': 2} {'type': 'loss', 'content': 0.004356909543275833, 'timestamp': '2025-09-30 23:07:08.611916', 'step': 3959, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:08.683251', 'step': 3959, 'epoch': 2} {'type': 'loss', 'content': 0.01156661007553339, 'timestamp': '2025-09-30 23:07:08.692956', 'step': 3960, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:08.757126', 'step': 3960, 'epoch': 2} {'type': 'loss', 'content': 0.014294360764324665, 'timestamp': '2025-09-30 23:07:08.759892', 'step': 3961, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:08.818233', 'step': 3961, 'epoch': 2} {'type': 'loss', 'content': 0.06156846880912781, 'timestamp': '2025-09-30 23:07:08.820880', 'step': 3962, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:08.884370', 'step': 3962, 'epoch': 2} {'type': 'loss', 'content': 0.033309634774923325, 'timestamp': '2025-09-30 23:07:08.888261', 'step': 3963, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:08.956141', 'step': 3963, 'epoch': 2} {'type': 'loss', 'content': 0.04670867696404457, 'timestamp': '2025-09-30 23:07:08.969139', 'step': 3964, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:09.030004', 'step': 3964, 'epoch': 2} {'type': 'loss', 'content': 0.010872864164412022, 'timestamp': '2025-09-30 23:07:09.039931', 'step': 3965, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:09.113567', 'step': 3965, 'epoch': 2} {'type': 'loss', 'content': 0.0019459592876955867, 'timestamp': '2025-09-30 23:07:09.121384', 'step': 3966, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:09.185486', 'step': 3966, 'epoch': 2} {'type': 'loss', 'content': 0.021640175953507423, 'timestamp': '2025-09-30 23:07:09.193727', 'step': 3967, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:09.265434', 'step': 3967, 'epoch': 2} {'type': 'loss', 'content': 0.032494597136974335, 'timestamp': '2025-09-30 23:07:09.278563', 'step': 3968, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:09.353354', 'step': 3968, 'epoch': 2} {'type': 'loss', 'content': 0.00529340747743845, 'timestamp': '2025-09-30 23:07:09.358390', 'step': 3969, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:09.432385', 'step': 3969, 'epoch': 2} {'type': 'loss', 'content': 0.020148994401097298, 'timestamp': '2025-09-30 23:07:09.443384', 'step': 3970, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:09.517489', 'step': 3970, 'epoch': 2} {'type': 'loss', 'content': 0.00896134227514267, 'timestamp': '2025-09-30 23:07:09.526828', 'step': 3971, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:09.598219', 'step': 3971, 'epoch': 2} {'type': 'loss', 'content': 0.008007441647350788, 'timestamp': '2025-09-30 23:07:09.605641', 'step': 3972, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:09.668954', 'step': 3972, 'epoch': 2} {'type': 'loss', 'content': 0.012399506755173206, 'timestamp': '2025-09-30 23:07:09.674543', 'step': 3973, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:09.745537', 'step': 3973, 'epoch': 2} {'type': 'loss', 'content': 0.004046719986945391, 'timestamp': '2025-09-30 23:07:09.756593', 'step': 3974, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:09.820470', 'step': 3974, 'epoch': 2} {'type': 'loss', 'content': 0.02900114469230175, 'timestamp': '2025-09-30 23:07:09.827146', 'step': 3975, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:07:09.893657', 'step': 3975, 'epoch': 2} {'type': 'loss', 'content': 0.014833853580057621, 'timestamp': '2025-09-30 23:07:09.904909', 'step': 3976, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:09.976251', 'step': 3976, 'epoch': 2} {'type': 'loss', 'content': 0.03590846061706543, 'timestamp': '2025-09-30 23:07:09.984164', 'step': 3977, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:10.048517', 'step': 3977, 'epoch': 2} {'type': 'loss', 'content': 0.008356517180800438, 'timestamp': '2025-09-30 23:07:10.058072', 'step': 3978, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:10.127867', 'step': 3978, 'epoch': 2} {'type': 'loss', 'content': 0.0005999586428515613, 'timestamp': '2025-09-30 23:07:10.130582', 'step': 3979, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:10.194734', 'step': 3979, 'epoch': 2} {'type': 'loss', 'content': 0.021405482664704323, 'timestamp': '2025-09-30 23:07:10.207867', 'step': 3980, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:10.286436', 'step': 3980, 'epoch': 2} {'type': 'loss', 'content': 0.014673884026706219, 'timestamp': '2025-09-30 23:07:10.292188', 'step': 3981, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:10.363899', 'step': 3981, 'epoch': 2} {'type': 'loss', 'content': 0.016874507069587708, 'timestamp': '2025-09-30 23:07:10.367387', 'step': 3982, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:10.431743', 'step': 3982, 'epoch': 2} {'type': 'loss', 'content': 0.048109471797943115, 'timestamp': '2025-09-30 23:07:10.434529', 'step': 3983, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:10.510603', 'step': 3983, 'epoch': 2} {'type': 'loss', 'content': 0.007876825518906116, 'timestamp': '2025-09-30 23:07:10.521890', 'step': 3984, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:10.599197', 'step': 3984, 'epoch': 2} {'type': 'loss', 'content': 0.0035458181519061327, 'timestamp': '2025-09-30 23:07:10.607093', 'step': 3985, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:07:10.687701', 'step': 3985, 'epoch': 2} {'type': 'loss', 'content': 0.025387896224856377, 'timestamp': '2025-09-30 23:07:10.697395', 'step': 3986, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:10.770042', 'step': 3986, 'epoch': 2} {'type': 'loss', 'content': 0.006835623644292355, 'timestamp': '2025-09-30 23:07:10.780186', 'step': 3987, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:10.852888', 'step': 3987, 'epoch': 2} {'type': 'loss', 'content': 0.011748048476874828, 'timestamp': '2025-09-30 23:07:10.866139', 'step': 3988, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:10.949701', 'step': 3988, 'epoch': 2} {'type': 'loss', 'content': 0.006133031100034714, 'timestamp': '2025-09-30 23:07:10.958221', 'step': 3989, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:11.039908', 'step': 3989, 'epoch': 2} {'type': 'loss', 'content': 0.008618798106908798, 'timestamp': '2025-09-30 23:07:11.046057', 'step': 3990, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:11.124197', 'step': 3990, 'epoch': 2} {'type': 'loss', 'content': 0.022174011915922165, 'timestamp': '2025-09-30 23:07:11.127180', 'step': 3991, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:11.201701', 'step': 3991, 'epoch': 2} {'type': 'loss', 'content': 0.021821599453687668, 'timestamp': '2025-09-30 23:07:11.213940', 'step': 3992, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:11.287112', 'step': 3992, 'epoch': 2} {'type': 'loss', 'content': 0.02440059371292591, 'timestamp': '2025-09-30 23:07:11.294378', 'step': 3993, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:11.365882', 'step': 3993, 'epoch': 2} {'type': 'loss', 'content': 0.018527835607528687, 'timestamp': '2025-09-30 23:07:11.373559', 'step': 3994, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:11.454750', 'step': 3994, 'epoch': 2} {'type': 'loss', 'content': 0.02787921391427517, 'timestamp': '2025-09-30 23:07:11.463182', 'step': 3995, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:11.545722', 'step': 3995, 'epoch': 2} {'type': 'loss', 'content': 0.01078922487795353, 'timestamp': '2025-09-30 23:07:11.567870', 'step': 3996, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:11.646363', 'step': 3996, 'epoch': 2} {'type': 'loss', 'content': 0.003803748870268464, 'timestamp': '2025-09-30 23:07:11.656513', 'step': 3997, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:11.742858', 'step': 3997, 'epoch': 2} {'type': 'loss', 'content': 0.011781992390751839, 'timestamp': '2025-09-30 23:07:11.746272', 'step': 3998, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:11.825554', 'step': 3998, 'epoch': 2} {'type': 'loss', 'content': 0.0029422480147331953, 'timestamp': '2025-09-30 23:07:11.830000', 'step': 3999, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 23:07:11.935012', 'step': 3999, 'epoch': 2} {'type': 'loss', 'content': 0.00359794357791543, 'timestamp': '2025-09-30 23:07:11.945155', 'step': 4000, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 4000', 'timestamp': '2025-09-30 23:07:12.404070', 'step': 4000, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:12.479561', 'step': 4000, 'epoch': 2} {'type': 'loss', 'content': 0.0409599170088768, 'timestamp': '2025-09-30 23:07:12.488715', 'step': 4001, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:12.570561', 'step': 4001, 'epoch': 2} {'type': 'loss', 'content': 0.008672475814819336, 'timestamp': '2025-09-30 23:07:12.585083', 'step': 4002, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:12.681835', 'step': 4002, 'epoch': 2} {'type': 'loss', 'content': 0.0017328051617369056, 'timestamp': '2025-09-30 23:07:12.694188', 'step': 4003, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:12.769135', 'step': 4003, 'epoch': 2} {'type': 'loss', 'content': 0.01563783921301365, 'timestamp': '2025-09-30 23:07:12.787461', 'step': 4004, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:12.851790', 'step': 4004, 'epoch': 2} {'type': 'loss', 'content': 0.019800757989287376, 'timestamp': '2025-09-30 23:07:12.859836', 'step': 4005, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:12.931106', 'step': 4005, 'epoch': 2} {'type': 'loss', 'content': 0.016458524391055107, 'timestamp': '2025-09-30 23:07:12.934558', 'step': 4006, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:13.010799', 'step': 4006, 'epoch': 2} {'type': 'loss', 'content': 0.03232061490416527, 'timestamp': '2025-09-30 23:07:13.019742', 'step': 4007, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:13.098774', 'step': 4007, 'epoch': 2} {'type': 'loss', 'content': 0.003596516326069832, 'timestamp': '2025-09-30 23:07:13.113594', 'step': 4008, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:13.193307', 'step': 4008, 'epoch': 2} {'type': 'loss', 'content': 0.056124258786439896, 'timestamp': '2025-09-30 23:07:13.205142', 'step': 4009, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:13.286899', 'step': 4009, 'epoch': 2} {'type': 'loss', 'content': 0.02462605945765972, 'timestamp': '2025-09-30 23:07:13.298846', 'step': 4010, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:13.376955', 'step': 4010, 'epoch': 2} {'type': 'loss', 'content': 0.04197811335325241, 'timestamp': '2025-09-30 23:07:13.380816', 'step': 4011, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:13.462186', 'step': 4011, 'epoch': 2} {'type': 'loss', 'content': 0.017064405605196953, 'timestamp': '2025-09-30 23:07:13.476763', 'step': 4012, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:13.541063', 'step': 4012, 'epoch': 2} {'type': 'loss', 'content': 0.06129435449838638, 'timestamp': '2025-09-30 23:07:13.549632', 'step': 4013, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:13.624801', 'step': 4013, 'epoch': 2} {'type': 'loss', 'content': 0.0016861073672771454, 'timestamp': '2025-09-30 23:07:13.634464', 'step': 4014, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:13.709171', 'step': 4014, 'epoch': 2} {'type': 'loss', 'content': 0.033335257321596146, 'timestamp': '2025-09-30 23:07:13.713001', 'step': 4015, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:13.795469', 'step': 4015, 'epoch': 2} {'type': 'loss', 'content': 0.029187705367803574, 'timestamp': '2025-09-30 23:07:13.809171', 'step': 4016, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:13.884473', 'step': 4016, 'epoch': 2} {'type': 'loss', 'content': 0.006190315820276737, 'timestamp': '2025-09-30 23:07:13.893651', 'step': 4017, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:13.958013', 'step': 4017, 'epoch': 2} {'type': 'loss', 'content': 0.07594700157642365, 'timestamp': '2025-09-30 23:07:13.968298', 'step': 4018, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:14.037829', 'step': 4018, 'epoch': 2} {'type': 'loss', 'content': 0.0022638593800365925, 'timestamp': '2025-09-30 23:07:14.046357', 'step': 4019, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:14.116740', 'step': 4019, 'epoch': 2} {'type': 'loss', 'content': 0.039882611483335495, 'timestamp': '2025-09-30 23:07:14.131046', 'step': 4020, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:14.205786', 'step': 4020, 'epoch': 2} {'type': 'loss', 'content': 0.023790955543518066, 'timestamp': '2025-09-30 23:07:14.210090', 'step': 4021, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:14.287387', 'step': 4021, 'epoch': 2} {'type': 'loss', 'content': 0.05328673869371414, 'timestamp': '2025-09-30 23:07:14.296220', 'step': 4022, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:14.357519', 'step': 4022, 'epoch': 2} {'type': 'loss', 'content': 0.04097537323832512, 'timestamp': '2025-09-30 23:07:14.367765', 'step': 4023, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:14.439772', 'step': 4023, 'epoch': 2} {'type': 'loss', 'content': 0.00492674857378006, 'timestamp': '2025-09-30 23:07:14.453647', 'step': 4024, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:14.523192', 'step': 4024, 'epoch': 2} {'type': 'loss', 'content': 0.023981409147381783, 'timestamp': '2025-09-30 23:07:14.529131', 'step': 4025, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:14.589653', 'step': 4025, 'epoch': 2} {'type': 'loss', 'content': 0.040350817143917084, 'timestamp': '2025-09-30 23:07:14.596323', 'step': 4026, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:14.679167', 'step': 4026, 'epoch': 2} {'type': 'loss', 'content': 0.039508361369371414, 'timestamp': '2025-09-30 23:07:14.685439', 'step': 4027, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:14.762052', 'step': 4027, 'epoch': 2} {'type': 'loss', 'content': 0.03652476891875267, 'timestamp': '2025-09-30 23:07:14.776794', 'step': 4028, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:14.856882', 'step': 4028, 'epoch': 2} {'type': 'loss', 'content': 0.019812192767858505, 'timestamp': '2025-09-30 23:07:14.860255', 'step': 4029, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:14.935567', 'step': 4029, 'epoch': 2} {'type': 'loss', 'content': 0.011007430031895638, 'timestamp': '2025-09-30 23:07:14.939839', 'step': 4030, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:15.027162', 'step': 4030, 'epoch': 2} {'type': 'loss', 'content': 0.04734984040260315, 'timestamp': '2025-09-30 23:07:15.038853', 'step': 4031, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:15.113153', 'step': 4031, 'epoch': 2} {'type': 'loss', 'content': 0.007415146101266146, 'timestamp': '2025-09-30 23:07:15.125920', 'step': 4032, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:15.196447', 'step': 4032, 'epoch': 2} {'type': 'loss', 'content': 0.015787798911333084, 'timestamp': '2025-09-30 23:07:15.200703', 'step': 4033, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:15.262502', 'step': 4033, 'epoch': 2} {'type': 'loss', 'content': 0.004112956579774618, 'timestamp': '2025-09-30 23:07:15.267212', 'step': 4034, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:15.323663', 'step': 4034, 'epoch': 2} {'type': 'loss', 'content': 0.024285465478897095, 'timestamp': '2025-09-30 23:07:15.333285', 'step': 4035, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:15.415935', 'step': 4035, 'epoch': 2} {'type': 'loss', 'content': 0.007423867005854845, 'timestamp': '2025-09-30 23:07:15.428786', 'step': 4036, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:15.509261', 'step': 4036, 'epoch': 2} {'type': 'loss', 'content': 0.013899298384785652, 'timestamp': '2025-09-30 23:07:15.514471', 'step': 4037, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:15.599245', 'step': 4037, 'epoch': 2} {'type': 'loss', 'content': 0.03755533695220947, 'timestamp': '2025-09-30 23:07:15.609300', 'step': 4038, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:15.691760', 'step': 4038, 'epoch': 2} {'type': 'loss', 'content': 0.005103666801005602, 'timestamp': '2025-09-30 23:07:15.701714', 'step': 4039, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:15.780519', 'step': 4039, 'epoch': 2} {'type': 'loss', 'content': 0.04523894190788269, 'timestamp': '2025-09-30 23:07:15.796324', 'step': 4040, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:15.875500', 'step': 4040, 'epoch': 2} {'type': 'loss', 'content': 0.029189297929406166, 'timestamp': '2025-09-30 23:07:15.878225', 'step': 4041, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:15.941135', 'step': 4041, 'epoch': 2} {'type': 'loss', 'content': 0.0022266446612775326, 'timestamp': '2025-09-30 23:07:15.949713', 'step': 4042, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:16.029180', 'step': 4042, 'epoch': 2} {'type': 'loss', 'content': 0.012203961610794067, 'timestamp': '2025-09-30 23:07:16.032835', 'step': 4043, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:16.097890', 'step': 4043, 'epoch': 2} {'type': 'loss', 'content': 0.0011368893319740891, 'timestamp': '2025-09-30 23:07:16.105226', 'step': 4044, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:16.208550', 'step': 4044, 'epoch': 2} {'type': 'loss', 'content': 0.0106668621301651, 'timestamp': '2025-09-30 23:07:16.228337', 'step': 4045, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:16.301762', 'step': 4045, 'epoch': 2} {'type': 'loss', 'content': 0.01045236550271511, 'timestamp': '2025-09-30 23:07:16.308712', 'step': 4046, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:16.426910', 'step': 4046, 'epoch': 2} {'type': 'loss', 'content': 0.03630280867218971, 'timestamp': '2025-09-30 23:07:16.447296', 'step': 4047, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:16.535072', 'step': 4047, 'epoch': 2} {'type': 'loss', 'content': 0.05192803591489792, 'timestamp': '2025-09-30 23:07:16.541777', 'step': 4048, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:16.622135', 'step': 4048, 'epoch': 2} {'type': 'loss', 'content': 0.0029934155754745007, 'timestamp': '2025-09-30 23:07:16.631944', 'step': 4049, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:16.704075', 'step': 4049, 'epoch': 2} {'type': 'loss', 'content': 0.03321794047951698, 'timestamp': '2025-09-30 23:07:16.712879', 'step': 4050, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:16.778784', 'step': 4050, 'epoch': 2} {'type': 'loss', 'content': 0.034624163061380386, 'timestamp': '2025-09-30 23:07:16.791642', 'step': 4051, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:16.868257', 'step': 4051, 'epoch': 2} {'type': 'loss', 'content': 0.012133851647377014, 'timestamp': '2025-09-30 23:07:16.880938', 'step': 4052, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:16.946145', 'step': 4052, 'epoch': 2} {'type': 'loss', 'content': 0.02597719058394432, 'timestamp': '2025-09-30 23:07:16.953276', 'step': 4053, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:17.019612', 'step': 4053, 'epoch': 2} {'type': 'loss', 'content': 0.05865335464477539, 'timestamp': '2025-09-30 23:07:17.026433', 'step': 4054, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:17.089390', 'step': 4054, 'epoch': 2} {'type': 'loss', 'content': 0.015989379957318306, 'timestamp': '2025-09-30 23:07:17.096181', 'step': 4055, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:17.158629', 'step': 4055, 'epoch': 2} {'type': 'loss', 'content': 0.012650134041905403, 'timestamp': '2025-09-30 23:07:17.171944', 'step': 4056, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:17.241691', 'step': 4056, 'epoch': 2} {'type': 'loss', 'content': 0.0018861368298530579, 'timestamp': '2025-09-30 23:07:17.249981', 'step': 4057, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:17.315036', 'step': 4057, 'epoch': 2} {'type': 'loss', 'content': 0.004023989662528038, 'timestamp': '2025-09-30 23:07:17.321615', 'step': 4058, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:17.393492', 'step': 4058, 'epoch': 2} {'type': 'loss', 'content': 0.0389571413397789, 'timestamp': '2025-09-30 23:07:17.405117', 'step': 4059, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:17.477480', 'step': 4059, 'epoch': 2} {'type': 'loss', 'content': 0.056770503520965576, 'timestamp': '2025-09-30 23:07:17.497620', 'step': 4060, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:17.568886', 'step': 4060, 'epoch': 2} {'type': 'loss', 'content': 0.005665838252753019, 'timestamp': '2025-09-30 23:07:17.582749', 'step': 4061, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:17.660078', 'step': 4061, 'epoch': 2} {'type': 'loss', 'content': 0.028831837698817253, 'timestamp': '2025-09-30 23:07:17.677399', 'step': 4062, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:17.767826', 'step': 4062, 'epoch': 2} {'type': 'loss', 'content': 0.029242513701319695, 'timestamp': '2025-09-30 23:07:17.781922', 'step': 4063, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:17.875955', 'step': 4063, 'epoch': 2} {'type': 'loss', 'content': 0.05731729045510292, 'timestamp': '2025-09-30 23:07:17.898284', 'step': 4064, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:17.982520', 'step': 4064, 'epoch': 2} {'type': 'loss', 'content': 0.03239129111170769, 'timestamp': '2025-09-30 23:07:17.986852', 'step': 4065, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:18.058176', 'step': 4065, 'epoch': 2} {'type': 'loss', 'content': 0.0027598768938332796, 'timestamp': '2025-09-30 23:07:18.068982', 'step': 4066, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:18.144240', 'step': 4066, 'epoch': 2} {'type': 'loss', 'content': 0.003722639288753271, 'timestamp': '2025-09-30 23:07:18.150610', 'step': 4067, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:18.222081', 'step': 4067, 'epoch': 2} {'type': 'loss', 'content': 0.033847372978925705, 'timestamp': '2025-09-30 23:07:18.237727', 'step': 4068, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:18.300402', 'step': 4068, 'epoch': 2} {'type': 'loss', 'content': 0.004567102994769812, 'timestamp': '2025-09-30 23:07:18.304955', 'step': 4069, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:18.372665', 'step': 4069, 'epoch': 2} {'type': 'loss', 'content': 0.02338576130568981, 'timestamp': '2025-09-30 23:07:18.382748', 'step': 4070, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:18.451772', 'step': 4070, 'epoch': 2} {'type': 'loss', 'content': 0.026223791763186455, 'timestamp': '2025-09-30 23:07:18.459116', 'step': 4071, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:18.528025', 'step': 4071, 'epoch': 2} {'type': 'loss', 'content': 0.007320567034184933, 'timestamp': '2025-09-30 23:07:18.540042', 'step': 4072, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:18.609249', 'step': 4072, 'epoch': 2} {'type': 'loss', 'content': 0.02592506632208824, 'timestamp': '2025-09-30 23:07:18.618804', 'step': 4073, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:18.676222', 'step': 4073, 'epoch': 2} {'type': 'loss', 'content': 0.017092788591980934, 'timestamp': '2025-09-30 23:07:18.680868', 'step': 4074, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:18.736625', 'step': 4074, 'epoch': 2} {'type': 'loss', 'content': 0.009261764585971832, 'timestamp': '2025-09-30 23:07:18.749885', 'step': 4075, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:18.821496', 'step': 4075, 'epoch': 2} {'type': 'loss', 'content': 0.013295321725308895, 'timestamp': '2025-09-30 23:07:18.828651', 'step': 4076, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:18.904368', 'step': 4076, 'epoch': 2} {'type': 'loss', 'content': 0.008081094361841679, 'timestamp': '2025-09-30 23:07:18.915382', 'step': 4077, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:18.997860', 'step': 4077, 'epoch': 2} {'type': 'loss', 'content': 0.020068351179361343, 'timestamp': '2025-09-30 23:07:19.012386', 'step': 4078, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:19.085879', 'step': 4078, 'epoch': 2} {'type': 'loss', 'content': 0.043224845081567764, 'timestamp': '2025-09-30 23:07:19.093228', 'step': 4079, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:19.163320', 'step': 4079, 'epoch': 2} {'type': 'loss', 'content': 0.023524085059762, 'timestamp': '2025-09-30 23:07:19.174846', 'step': 4080, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:19.255071', 'step': 4080, 'epoch': 2} {'type': 'loss', 'content': 0.0068268622271716595, 'timestamp': '2025-09-30 23:07:19.272024', 'step': 4081, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:19.367465', 'step': 4081, 'epoch': 2} {'type': 'loss', 'content': 0.010591375641524792, 'timestamp': '2025-09-30 23:07:19.386686', 'step': 4082, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:19.461130', 'step': 4082, 'epoch': 2} {'type': 'loss', 'content': 0.01308509148657322, 'timestamp': '2025-09-30 23:07:19.464870', 'step': 4083, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:19.520251', 'step': 4083, 'epoch': 2} {'type': 'loss', 'content': 0.029605252668261528, 'timestamp': '2025-09-30 23:07:19.527776', 'step': 4084, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:19.584139', 'step': 4084, 'epoch': 2} {'type': 'loss', 'content': 0.016861848533153534, 'timestamp': '2025-09-30 23:07:19.598698', 'step': 4085, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:19.670430', 'step': 4085, 'epoch': 2} {'type': 'loss', 'content': 0.025273606181144714, 'timestamp': '2025-09-30 23:07:19.680716', 'step': 4086, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:19.764899', 'step': 4086, 'epoch': 2} {'type': 'loss', 'content': 0.022294221445918083, 'timestamp': '2025-09-30 23:07:19.774319', 'step': 4087, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:19.853071', 'step': 4087, 'epoch': 2} {'type': 'loss', 'content': 0.005357757210731506, 'timestamp': '2025-09-30 23:07:19.870219', 'step': 4088, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:19.956847', 'step': 4088, 'epoch': 2} {'type': 'loss', 'content': 0.042013343423604965, 'timestamp': '2025-09-30 23:07:19.972777', 'step': 4089, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:20.047416', 'step': 4089, 'epoch': 2} {'type': 'loss', 'content': 0.005799402017146349, 'timestamp': '2025-09-30 23:07:20.062939', 'step': 4090, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:20.146596', 'step': 4090, 'epoch': 2} {'type': 'loss', 'content': 0.015840791165828705, 'timestamp': '2025-09-30 23:07:20.151771', 'step': 4091, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:20.229484', 'step': 4091, 'epoch': 2} {'type': 'loss', 'content': 0.0461241640150547, 'timestamp': '2025-09-30 23:07:20.248528', 'step': 4092, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:20.335475', 'step': 4092, 'epoch': 2} {'type': 'loss', 'content': 0.022952597588300705, 'timestamp': '2025-09-30 23:07:20.340057', 'step': 4093, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:20.418010', 'step': 4093, 'epoch': 2} {'type': 'loss', 'content': 0.004746586550027132, 'timestamp': '2025-09-30 23:07:20.429275', 'step': 4094, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:20.493163', 'step': 4094, 'epoch': 2} {'type': 'loss', 'content': 0.019035503268241882, 'timestamp': '2025-09-30 23:07:20.506324', 'step': 4095, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:20.582914', 'step': 4095, 'epoch': 2} {'type': 'loss', 'content': 0.03216398507356644, 'timestamp': '2025-09-30 23:07:20.598867', 'step': 4096, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:20.674154', 'step': 4096, 'epoch': 2} {'type': 'loss', 'content': 0.00922800600528717, 'timestamp': '2025-09-30 23:07:20.685172', 'step': 4097, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:20.758650', 'step': 4097, 'epoch': 2} {'type': 'loss', 'content': 0.019925037398934364, 'timestamp': '2025-09-30 23:07:20.778184', 'step': 4098, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:20.880271', 'step': 4098, 'epoch': 2} {'type': 'loss', 'content': 0.01309859286993742, 'timestamp': '2025-09-30 23:07:20.899547', 'step': 4099, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:20.971554', 'step': 4099, 'epoch': 2} {'type': 'loss', 'content': 0.019369643181562424, 'timestamp': '2025-09-30 23:07:20.991257', 'step': 4100, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:21.101751', 'step': 4100, 'epoch': 2} {'type': 'loss', 'content': 0.0027365267742425203, 'timestamp': '2025-09-30 23:07:21.117217', 'step': 4101, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:21.194904', 'step': 4101, 'epoch': 2} {'type': 'loss', 'content': 0.013794394209980965, 'timestamp': '2025-09-30 23:07:21.199429', 'step': 4102, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:21.260096', 'step': 4102, 'epoch': 2} {'type': 'loss', 'content': 0.008059772662818432, 'timestamp': '2025-09-30 23:07:21.272835', 'step': 4103, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:21.348904', 'step': 4103, 'epoch': 2} {'type': 'loss', 'content': 0.007993337698280811, 'timestamp': '2025-09-30 23:07:21.363849', 'step': 4104, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1596914505344}], 'timestamp': '2025-09-30 23:07:26.636433', 'step': 4104, 'epoch': 2} {'type': 'pplx', 'content': 7539236.973322541, 'timestamp': '2025-09-30 23:07:26.639400', 'step': 4104, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:26.691488', 'step': 4104, 'epoch': 2} {'type': 'loss', 'content': 0.0011444102274253964, 'timestamp': '2025-09-30 23:07:26.698006', 'step': 4105, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:26.758792', 'step': 4105, 'epoch': 2} {'type': 'loss', 'content': 0.02785097062587738, 'timestamp': '2025-09-30 23:07:26.771729', 'step': 4106, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:26.828727', 'step': 4106, 'epoch': 2} {'type': 'loss', 'content': 0.003653913736343384, 'timestamp': '2025-09-30 23:07:26.831545', 'step': 4107, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:26.896601', 'step': 4107, 'epoch': 2} {'type': 'loss', 'content': 0.028615979477763176, 'timestamp': '2025-09-30 23:07:26.905443', 'step': 4108, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:26.972052', 'step': 4108, 'epoch': 2} {'type': 'loss', 'content': 0.028115997090935707, 'timestamp': '2025-09-30 23:07:26.981231', 'step': 4109, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:27.055588', 'step': 4109, 'epoch': 2} {'type': 'loss', 'content': 0.020361408591270447, 'timestamp': '2025-09-30 23:07:27.066430', 'step': 4110, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:27.135259', 'step': 4110, 'epoch': 2} {'type': 'loss', 'content': 0.00674628559499979, 'timestamp': '2025-09-30 23:07:27.138446', 'step': 4111, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:27.193100', 'step': 4111, 'epoch': 2} {'type': 'loss', 'content': 0.00818093866109848, 'timestamp': '2025-09-30 23:07:27.210855', 'step': 4112, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:27.278076', 'step': 4112, 'epoch': 2} {'type': 'loss', 'content': 0.016644451767206192, 'timestamp': '2025-09-30 23:07:27.286506', 'step': 4113, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:27.350832', 'step': 4113, 'epoch': 2} {'type': 'loss', 'content': 0.009168537333607674, 'timestamp': '2025-09-30 23:07:27.358064', 'step': 4114, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:27.431554', 'step': 4114, 'epoch': 2} {'type': 'loss', 'content': 0.034183915704488754, 'timestamp': '2025-09-30 23:07:27.435326', 'step': 4115, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:27.503405', 'step': 4115, 'epoch': 2} {'type': 'loss', 'content': 0.02496476098895073, 'timestamp': '2025-09-30 23:07:27.509692', 'step': 4116, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:27.576107', 'step': 4116, 'epoch': 2} {'type': 'loss', 'content': 0.023150144144892693, 'timestamp': '2025-09-30 23:07:27.584720', 'step': 4117, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:27.655231', 'step': 4117, 'epoch': 2} {'type': 'loss', 'content': 0.005433883052319288, 'timestamp': '2025-09-30 23:07:27.665051', 'step': 4118, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:27.738667', 'step': 4118, 'epoch': 2} {'type': 'loss', 'content': 0.01195587683469057, 'timestamp': '2025-09-30 23:07:27.741651', 'step': 4119, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:27.808905', 'step': 4119, 'epoch': 2} {'type': 'loss', 'content': 0.00927023496478796, 'timestamp': '2025-09-30 23:07:27.822381', 'step': 4120, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:27.892330', 'step': 4120, 'epoch': 2} {'type': 'loss', 'content': 0.016805393621325493, 'timestamp': '2025-09-30 23:07:27.907239', 'step': 4121, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:28.008887', 'step': 4121, 'epoch': 2} {'type': 'loss', 'content': 0.0008718087919987738, 'timestamp': '2025-09-30 23:07:28.024913', 'step': 4122, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:28.121618', 'step': 4122, 'epoch': 2} {'type': 'loss', 'content': 0.005953627172857523, 'timestamp': '2025-09-30 23:07:28.140648', 'step': 4123, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:28.224875', 'step': 4123, 'epoch': 2} {'type': 'loss', 'content': 0.016847502440214157, 'timestamp': '2025-09-30 23:07:28.239029', 'step': 4124, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:28.314625', 'step': 4124, 'epoch': 2} {'type': 'loss', 'content': 0.016924146562814713, 'timestamp': '2025-09-30 23:07:28.322331', 'step': 4125, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:28.393745', 'step': 4125, 'epoch': 2} {'type': 'loss', 'content': 0.004800934344530106, 'timestamp': '2025-09-30 23:07:28.404540', 'step': 4126, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:28.475674', 'step': 4126, 'epoch': 2} {'type': 'loss', 'content': 0.0094490647315979, 'timestamp': '2025-09-30 23:07:28.484386', 'step': 4127, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:28.553893', 'step': 4127, 'epoch': 2} {'type': 'loss', 'content': 0.05003475397825241, 'timestamp': '2025-09-30 23:07:28.568611', 'step': 4128, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:28.648624', 'step': 4128, 'epoch': 2} {'type': 'loss', 'content': 0.004320107400417328, 'timestamp': '2025-09-30 23:07:28.650865', 'step': 4129, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:28.713680', 'step': 4129, 'epoch': 2} {'type': 'loss', 'content': 0.0014334309380501509, 'timestamp': '2025-09-30 23:07:28.719430', 'step': 4130, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:28.789546', 'step': 4130, 'epoch': 2} {'type': 'loss', 'content': 0.018478896468877792, 'timestamp': '2025-09-30 23:07:28.797559', 'step': 4131, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:28.870906', 'step': 4131, 'epoch': 2} {'type': 'loss', 'content': 0.004059901461005211, 'timestamp': '2025-09-30 23:07:28.890659', 'step': 4132, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:28.985975', 'step': 4132, 'epoch': 2} {'type': 'loss', 'content': 0.004942330066114664, 'timestamp': '2025-09-30 23:07:29.005473', 'step': 4133, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:29.099964', 'step': 4133, 'epoch': 2} {'type': 'loss', 'content': 0.0257264431566, 'timestamp': '2025-09-30 23:07:29.113000', 'step': 4134, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:29.177925', 'step': 4134, 'epoch': 2} {'type': 'loss', 'content': 0.006339813116937876, 'timestamp': '2025-09-30 23:07:29.182374', 'step': 4135, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:29.250603', 'step': 4135, 'epoch': 2} {'type': 'loss', 'content': 0.02653515711426735, 'timestamp': '2025-09-30 23:07:29.258565', 'step': 4136, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:29.330035', 'step': 4136, 'epoch': 2} {'type': 'loss', 'content': 0.003969985526055098, 'timestamp': '2025-09-30 23:07:29.340837', 'step': 4137, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:29.412091', 'step': 4137, 'epoch': 2} {'type': 'loss', 'content': 0.0008052248158492148, 'timestamp': '2025-09-30 23:07:29.420330', 'step': 4138, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:29.497662', 'step': 4138, 'epoch': 2} {'type': 'loss', 'content': 0.061172183603048325, 'timestamp': '2025-09-30 23:07:29.501697', 'step': 4139, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:29.568341', 'step': 4139, 'epoch': 2} {'type': 'loss', 'content': 0.004480606876313686, 'timestamp': '2025-09-30 23:07:29.590619', 'step': 4140, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:29.683596', 'step': 4140, 'epoch': 2} {'type': 'loss', 'content': 0.01018641609698534, 'timestamp': '2025-09-30 23:07:29.700540', 'step': 4141, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:29.793928', 'step': 4141, 'epoch': 2} {'type': 'loss', 'content': 0.027147933840751648, 'timestamp': '2025-09-30 23:07:29.810952', 'step': 4142, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:29.897110', 'step': 4142, 'epoch': 2} {'type': 'loss', 'content': 0.014267928898334503, 'timestamp': '2025-09-30 23:07:29.908098', 'step': 4143, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:29.988061', 'step': 4143, 'epoch': 2} {'type': 'loss', 'content': 0.0012251383159309626, 'timestamp': '2025-09-30 23:07:29.999422', 'step': 4144, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:30.071692', 'step': 4144, 'epoch': 2} {'type': 'loss', 'content': 0.0019638279918581247, 'timestamp': '2025-09-30 23:07:30.080988', 'step': 4145, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:07:30.151334', 'step': 4145, 'epoch': 2} {'type': 'loss', 'content': 0.001954760169610381, 'timestamp': '2025-09-30 23:07:30.155860', 'step': 4146, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:30.229328', 'step': 4146, 'epoch': 2} {'type': 'loss', 'content': 0.05154450610280037, 'timestamp': '2025-09-30 23:07:30.233250', 'step': 4147, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:30.323192', 'step': 4147, 'epoch': 2} {'type': 'loss', 'content': 0.0008055016514845192, 'timestamp': '2025-09-30 23:07:30.334963', 'step': 4148, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:30.390037', 'step': 4148, 'epoch': 2} {'type': 'loss', 'content': 0.0077471355907619, 'timestamp': '2025-09-30 23:07:30.399324', 'step': 4149, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:30.466857', 'step': 4149, 'epoch': 2} {'type': 'loss', 'content': 0.020394761115312576, 'timestamp': '2025-09-30 23:07:30.470666', 'step': 4150, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:30.538761', 'step': 4150, 'epoch': 2} {'type': 'loss', 'content': 0.0131386024877429, 'timestamp': '2025-09-30 23:07:30.542224', 'step': 4151, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:30.610902', 'step': 4151, 'epoch': 2} {'type': 'loss', 'content': 0.0005720873596146703, 'timestamp': '2025-09-30 23:07:30.618310', 'step': 4152, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:30.685713', 'step': 4152, 'epoch': 2} {'type': 'loss', 'content': 0.0035576762165874243, 'timestamp': '2025-09-30 23:07:30.694728', 'step': 4153, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:30.764890', 'step': 4153, 'epoch': 2} {'type': 'loss', 'content': 0.013222530484199524, 'timestamp': '2025-09-30 23:07:30.768677', 'step': 4154, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:30.834558', 'step': 4154, 'epoch': 2} {'type': 'loss', 'content': 0.034260641783475876, 'timestamp': '2025-09-30 23:07:30.838697', 'step': 4155, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:30.908442', 'step': 4155, 'epoch': 2} {'type': 'loss', 'content': 0.05432259663939476, 'timestamp': '2025-09-30 23:07:30.921976', 'step': 4156, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:30.990490', 'step': 4156, 'epoch': 2} {'type': 'loss', 'content': 0.005082692019641399, 'timestamp': '2025-09-30 23:07:31.009060', 'step': 4157, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:31.090296', 'step': 4157, 'epoch': 2} {'type': 'loss', 'content': 0.015499631874263287, 'timestamp': '2025-09-30 23:07:31.094028', 'step': 4158, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:31.156366', 'step': 4158, 'epoch': 2} {'type': 'loss', 'content': 0.04838041588664055, 'timestamp': '2025-09-30 23:07:31.164046', 'step': 4159, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:31.236744', 'step': 4159, 'epoch': 2} {'type': 'loss', 'content': 0.013616623356938362, 'timestamp': '2025-09-30 23:07:31.244437', 'step': 4160, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:31.316881', 'step': 4160, 'epoch': 2} {'type': 'loss', 'content': 0.004834159277379513, 'timestamp': '2025-09-30 23:07:31.327558', 'step': 4161, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:31.400848', 'step': 4161, 'epoch': 2} {'type': 'loss', 'content': 0.033732131123542786, 'timestamp': '2025-09-30 23:07:31.410209', 'step': 4162, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:31.480615', 'step': 4162, 'epoch': 2} {'type': 'loss', 'content': 0.004427330568432808, 'timestamp': '2025-09-30 23:07:31.490062', 'step': 4163, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:31.564410', 'step': 4163, 'epoch': 2} {'type': 'loss', 'content': 0.039271075278520584, 'timestamp': '2025-09-30 23:07:31.577842', 'step': 4164, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:31.636243', 'step': 4164, 'epoch': 2} {'type': 'loss', 'content': 0.006171087268739939, 'timestamp': '2025-09-30 23:07:31.644878', 'step': 4165, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:31.713486', 'step': 4165, 'epoch': 2} {'type': 'loss', 'content': 0.02908201701939106, 'timestamp': '2025-09-30 23:07:31.716893', 'step': 4166, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:07:31.781408', 'step': 4166, 'epoch': 2} {'type': 'loss', 'content': 0.01023829448968172, 'timestamp': '2025-09-30 23:07:31.791995', 'step': 4167, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:31.854464', 'step': 4167, 'epoch': 2} {'type': 'loss', 'content': 0.030785445123910904, 'timestamp': '2025-09-30 23:07:31.869260', 'step': 4168, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:07:31.939537', 'step': 4168, 'epoch': 2} {'type': 'loss', 'content': 0.0015312430914491415, 'timestamp': '2025-09-30 23:07:31.943364', 'step': 4169, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:32.018972', 'step': 4169, 'epoch': 2} {'type': 'loss', 'content': 0.0009078431758098304, 'timestamp': '2025-09-30 23:07:32.027688', 'step': 4170, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:32.103064', 'step': 4170, 'epoch': 2} {'type': 'loss', 'content': 0.02867763675749302, 'timestamp': '2025-09-30 23:07:32.113556', 'step': 4171, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:32.186617', 'step': 4171, 'epoch': 2} {'type': 'loss', 'content': 0.05286724492907524, 'timestamp': '2025-09-30 23:07:32.199114', 'step': 4172, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:32.276096', 'step': 4172, 'epoch': 2} {'type': 'loss', 'content': 0.024549344554543495, 'timestamp': '2025-09-30 23:07:32.287970', 'step': 4173, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:32.361646', 'step': 4173, 'epoch': 2} {'type': 'loss', 'content': 0.043882910162210464, 'timestamp': '2025-09-30 23:07:32.372942', 'step': 4174, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:32.445514', 'step': 4174, 'epoch': 2} {'type': 'loss', 'content': 0.00741939153522253, 'timestamp': '2025-09-30 23:07:32.448874', 'step': 4175, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:32.519960', 'step': 4175, 'epoch': 2} {'type': 'loss', 'content': 0.0012333624763414264, 'timestamp': '2025-09-30 23:07:32.527217', 'step': 4176, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:32.592544', 'step': 4176, 'epoch': 2} {'type': 'loss', 'content': 0.04195311665534973, 'timestamp': '2025-09-30 23:07:32.602986', 'step': 4177, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:32.664415', 'step': 4177, 'epoch': 2} {'type': 'loss', 'content': 0.001302183954976499, 'timestamp': '2025-09-30 23:07:32.675385', 'step': 4178, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:32.750446', 'step': 4178, 'epoch': 2} {'type': 'loss', 'content': 0.003599452320486307, 'timestamp': '2025-09-30 23:07:32.756852', 'step': 4179, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:32.837840', 'step': 4179, 'epoch': 2} {'type': 'loss', 'content': 0.022747179493308067, 'timestamp': '2025-09-30 23:07:32.848243', 'step': 4180, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:32.911595', 'step': 4180, 'epoch': 2} {'type': 'loss', 'content': 0.000852916797157377, 'timestamp': '2025-09-30 23:07:32.915394', 'step': 4181, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:32.984545', 'step': 4181, 'epoch': 2} {'type': 'loss', 'content': 0.0010339220752939582, 'timestamp': '2025-09-30 23:07:32.994644', 'step': 4182, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:33.059703', 'step': 4182, 'epoch': 2} {'type': 'loss', 'content': 0.03592618182301521, 'timestamp': '2025-09-30 23:07:33.065319', 'step': 4183, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:33.135847', 'step': 4183, 'epoch': 2} {'type': 'loss', 'content': 0.022056078538298607, 'timestamp': '2025-09-30 23:07:33.142983', 'step': 4184, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:33.200367', 'step': 4184, 'epoch': 2} {'type': 'loss', 'content': 0.00364786759018898, 'timestamp': '2025-09-30 23:07:33.205686', 'step': 4185, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:33.268788', 'step': 4185, 'epoch': 2} {'type': 'loss', 'content': 0.006984360050410032, 'timestamp': '2025-09-30 23:07:33.272807', 'step': 4186, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:33.330748', 'step': 4186, 'epoch': 2} {'type': 'loss', 'content': 0.006079851649701595, 'timestamp': '2025-09-30 23:07:33.345098', 'step': 4187, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:33.414715', 'step': 4187, 'epoch': 2} {'type': 'loss', 'content': 0.028210248798131943, 'timestamp': '2025-09-30 23:07:33.425957', 'step': 4188, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:33.488795', 'step': 4188, 'epoch': 2} {'type': 'loss', 'content': 0.017003729939460754, 'timestamp': '2025-09-30 23:07:33.495543', 'step': 4189, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:33.554578', 'step': 4189, 'epoch': 2} {'type': 'loss', 'content': 0.016461437568068504, 'timestamp': '2025-09-30 23:07:33.569484', 'step': 4190, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:33.636231', 'step': 4190, 'epoch': 2} {'type': 'loss', 'content': 0.03066178970038891, 'timestamp': '2025-09-30 23:07:33.640976', 'step': 4191, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:33.711043', 'step': 4191, 'epoch': 2} {'type': 'loss', 'content': 0.002124955179169774, 'timestamp': '2025-09-30 23:07:33.722323', 'step': 4192, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:33.790565', 'step': 4192, 'epoch': 2} {'type': 'loss', 'content': 0.016885345801711082, 'timestamp': '2025-09-30 23:07:33.797849', 'step': 4193, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:33.868325', 'step': 4193, 'epoch': 2} {'type': 'loss', 'content': 0.013174655847251415, 'timestamp': '2025-09-30 23:07:33.883609', 'step': 4194, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:33.947121', 'step': 4194, 'epoch': 2} {'type': 'loss', 'content': 0.012976682744920254, 'timestamp': '2025-09-30 23:07:33.951820', 'step': 4195, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:34.026204', 'step': 4195, 'epoch': 2} {'type': 'loss', 'content': 0.003566535422578454, 'timestamp': '2025-09-30 23:07:34.039459', 'step': 4196, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:34.097377', 'step': 4196, 'epoch': 2} {'type': 'loss', 'content': 0.023492859676480293, 'timestamp': '2025-09-30 23:07:34.102890', 'step': 4197, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:34.182154', 'step': 4197, 'epoch': 2} {'type': 'loss', 'content': 0.009725473821163177, 'timestamp': '2025-09-30 23:07:34.196889', 'step': 4198, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:34.276261', 'step': 4198, 'epoch': 2} {'type': 'loss', 'content': 0.00822226982563734, 'timestamp': '2025-09-30 23:07:34.286735', 'step': 4199, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:34.364357', 'step': 4199, 'epoch': 2} {'type': 'loss', 'content': 0.01713266782462597, 'timestamp': '2025-09-30 23:07:34.375937', 'step': 4200, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:34.434202', 'step': 4200, 'epoch': 2} {'type': 'loss', 'content': 0.021302057430148125, 'timestamp': '2025-09-30 23:07:34.446918', 'step': 4201, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:34.522922', 'step': 4201, 'epoch': 2} {'type': 'loss', 'content': 0.008296391926705837, 'timestamp': '2025-09-30 23:07:34.531044', 'step': 4202, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:34.615737', 'step': 4202, 'epoch': 2} {'type': 'loss', 'content': 0.001163557986728847, 'timestamp': '2025-09-30 23:07:34.627502', 'step': 4203, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:34.688334', 'step': 4203, 'epoch': 2} {'type': 'loss', 'content': 0.007352711167186499, 'timestamp': '2025-09-30 23:07:34.700733', 'step': 4204, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:34.768982', 'step': 4204, 'epoch': 2} {'type': 'loss', 'content': 0.00853121466934681, 'timestamp': '2025-09-30 23:07:34.778368', 'step': 4205, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:34.854152', 'step': 4205, 'epoch': 2} {'type': 'loss', 'content': 0.026820117607712746, 'timestamp': '2025-09-30 23:07:34.864798', 'step': 4206, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:34.929964', 'step': 4206, 'epoch': 2} {'type': 'loss', 'content': 0.0293929111212492, 'timestamp': '2025-09-30 23:07:34.942145', 'step': 4207, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:35.028899', 'step': 4207, 'epoch': 2} {'type': 'loss', 'content': 0.016333244740962982, 'timestamp': '2025-09-30 23:07:35.038309', 'step': 4208, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:35.097124', 'step': 4208, 'epoch': 2} {'type': 'loss', 'content': 0.017565781250596046, 'timestamp': '2025-09-30 23:07:35.107239', 'step': 4209, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:35.182198', 'step': 4209, 'epoch': 2} {'type': 'loss', 'content': 0.03163254261016846, 'timestamp': '2025-09-30 23:07:35.186747', 'step': 4210, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:35.249506', 'step': 4210, 'epoch': 2} {'type': 'loss', 'content': 0.011343742720782757, 'timestamp': '2025-09-30 23:07:35.264369', 'step': 4211, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:07:35.326241', 'step': 4211, 'epoch': 2} {'type': 'loss', 'content': 0.0028166172560304403, 'timestamp': '2025-09-30 23:07:35.340386', 'step': 4212, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:35.422086', 'step': 4212, 'epoch': 2} {'type': 'loss', 'content': 0.0040354421362280846, 'timestamp': '2025-09-30 23:07:35.431307', 'step': 4213, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:35.497741', 'step': 4213, 'epoch': 2} {'type': 'loss', 'content': 0.0032602474093437195, 'timestamp': '2025-09-30 23:07:35.513713', 'step': 4214, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:35.596121', 'step': 4214, 'epoch': 2} {'type': 'loss', 'content': 0.010218463838100433, 'timestamp': '2025-09-30 23:07:35.608649', 'step': 4215, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:35.697729', 'step': 4215, 'epoch': 2} {'type': 'loss', 'content': 0.02730351686477661, 'timestamp': '2025-09-30 23:07:35.714292', 'step': 4216, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:35.803048', 'step': 4216, 'epoch': 2} {'type': 'loss', 'content': 0.0019538558553904295, 'timestamp': '2025-09-30 23:07:35.807531', 'step': 4217, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:35.897761', 'step': 4217, 'epoch': 2} {'type': 'loss', 'content': 0.019800299778580666, 'timestamp': '2025-09-30 23:07:35.905867', 'step': 4218, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:35.997690', 'step': 4218, 'epoch': 2} {'type': 'loss', 'content': 0.008637088350951672, 'timestamp': '2025-09-30 23:07:36.002111', 'step': 4219, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:36.083249', 'step': 4219, 'epoch': 2} {'type': 'loss', 'content': 0.004707512445747852, 'timestamp': '2025-09-30 23:07:36.090351', 'step': 4220, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:36.151937', 'step': 4220, 'epoch': 2} {'type': 'loss', 'content': 0.0047027310356497765, 'timestamp': '2025-09-30 23:07:36.159895', 'step': 4221, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:36.228657', 'step': 4221, 'epoch': 2} {'type': 'loss', 'content': 0.009086506441235542, 'timestamp': '2025-09-30 23:07:36.244678', 'step': 4222, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:36.339341', 'step': 4222, 'epoch': 2} {'type': 'loss', 'content': 0.0007788329385221004, 'timestamp': '2025-09-30 23:07:36.358819', 'step': 4223, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:36.461508', 'step': 4223, 'epoch': 2} {'type': 'loss', 'content': 0.00189084536395967, 'timestamp': '2025-09-30 23:07:36.485259', 'step': 4224, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:36.584710', 'step': 4224, 'epoch': 2} {'type': 'loss', 'content': 0.0038386587984859943, 'timestamp': '2025-09-30 23:07:36.599171', 'step': 4225, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:36.678413', 'step': 4225, 'epoch': 2} {'type': 'loss', 'content': 0.006688238121569157, 'timestamp': '2025-09-30 23:07:36.686507', 'step': 4226, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:36.748803', 'step': 4226, 'epoch': 2} {'type': 'loss', 'content': 0.0032463117968291044, 'timestamp': '2025-09-30 23:07:36.759215', 'step': 4227, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:36.830292', 'step': 4227, 'epoch': 2} {'type': 'loss', 'content': 0.002433664631098509, 'timestamp': '2025-09-30 23:07:36.839641', 'step': 4228, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:36.909698', 'step': 4228, 'epoch': 2} {'type': 'loss', 'content': 0.0009718074579723179, 'timestamp': '2025-09-30 23:07:36.913529', 'step': 4229, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:36.969519', 'step': 4229, 'epoch': 2} {'type': 'loss', 'content': 0.019025061279535294, 'timestamp': '2025-09-30 23:07:36.973473', 'step': 4230, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:37.028744', 'step': 4230, 'epoch': 2} {'type': 'loss', 'content': 0.0018155405996367335, 'timestamp': '2025-09-30 23:07:37.032447', 'step': 4231, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:37.088375', 'step': 4231, 'epoch': 2} {'type': 'loss', 'content': 0.0014307277742773294, 'timestamp': '2025-09-30 23:07:37.096181', 'step': 4232, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:37.152516', 'step': 4232, 'epoch': 2} {'type': 'loss', 'content': 0.03406163677573204, 'timestamp': '2025-09-30 23:07:37.156771', 'step': 4233, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:37.215837', 'step': 4233, 'epoch': 2} {'type': 'loss', 'content': 0.001903115538880229, 'timestamp': '2025-09-30 23:07:37.220526', 'step': 4234, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:37.284054', 'step': 4234, 'epoch': 2} {'type': 'loss', 'content': 0.042291317135095596, 'timestamp': '2025-09-30 23:07:37.289138', 'step': 4235, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:37.348904', 'step': 4235, 'epoch': 2} {'type': 'loss', 'content': 0.006858620326966047, 'timestamp': '2025-09-30 23:07:37.358873', 'step': 4236, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:37.416963', 'step': 4236, 'epoch': 2} {'type': 'loss', 'content': 0.0036978486459702253, 'timestamp': '2025-09-30 23:07:37.421906', 'step': 4237, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:37.475234', 'step': 4237, 'epoch': 2} {'type': 'loss', 'content': 0.0027565457858145237, 'timestamp': '2025-09-30 23:07:37.479638', 'step': 4238, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:37.537689', 'step': 4238, 'epoch': 2} {'type': 'loss', 'content': 0.008794407360255718, 'timestamp': '2025-09-30 23:07:37.542153', 'step': 4239, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:37.598583', 'step': 4239, 'epoch': 2} {'type': 'loss', 'content': 0.014177436009049416, 'timestamp': '2025-09-30 23:07:37.607586', 'step': 4240, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:37.674058', 'step': 4240, 'epoch': 2} {'type': 'loss', 'content': 0.003364336444064975, 'timestamp': '2025-09-30 23:07:37.685578', 'step': 4241, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:37.764228', 'step': 4241, 'epoch': 2} {'type': 'loss', 'content': 0.009128655306994915, 'timestamp': '2025-09-30 23:07:37.767739', 'step': 4242, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:37.831552', 'step': 4242, 'epoch': 2} {'type': 'loss', 'content': 0.0012411895440891385, 'timestamp': '2025-09-30 23:07:37.839446', 'step': 4243, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:37.908471', 'step': 4243, 'epoch': 2} {'type': 'loss', 'content': 0.05128537490963936, 'timestamp': '2025-09-30 23:07:37.919685', 'step': 4244, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:37.990150', 'step': 4244, 'epoch': 2} {'type': 'loss', 'content': 0.0020407948177307844, 'timestamp': '2025-09-30 23:07:38.001046', 'step': 4245, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:38.079865', 'step': 4245, 'epoch': 2} {'type': 'loss', 'content': 0.0003233674797229469, 'timestamp': '2025-09-30 23:07:38.089823', 'step': 4246, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:38.147927', 'step': 4246, 'epoch': 2} {'type': 'loss', 'content': 0.03391070291399956, 'timestamp': '2025-09-30 23:07:38.156977', 'step': 4247, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:38.217541', 'step': 4247, 'epoch': 2} {'type': 'loss', 'content': 0.01759704016149044, 'timestamp': '2025-09-30 23:07:38.224719', 'step': 4248, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:38.296169', 'step': 4248, 'epoch': 2} {'type': 'loss', 'content': 0.010222271084785461, 'timestamp': '2025-09-30 23:07:38.306328', 'step': 4249, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:38.369388', 'step': 4249, 'epoch': 2} {'type': 'loss', 'content': 0.006354053039103746, 'timestamp': '2025-09-30 23:07:38.377091', 'step': 4250, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:38.441805', 'step': 4250, 'epoch': 2} {'type': 'loss', 'content': 0.049049049615859985, 'timestamp': '2025-09-30 23:07:38.448979', 'step': 4251, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:38.519790', 'step': 4251, 'epoch': 2} {'type': 'loss', 'content': 0.04637922719120979, 'timestamp': '2025-09-30 23:07:38.533202', 'step': 4252, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:38.593570', 'step': 4252, 'epoch': 2} {'type': 'loss', 'content': 0.0033733753953129053, 'timestamp': '2025-09-30 23:07:38.603159', 'step': 4253, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:38.672262', 'step': 4253, 'epoch': 2} {'type': 'loss', 'content': 0.007434910628944635, 'timestamp': '2025-09-30 23:07:38.680038', 'step': 4254, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:38.754194', 'step': 4254, 'epoch': 2} {'type': 'loss', 'content': 0.0005607242928817868, 'timestamp': '2025-09-30 23:07:38.764152', 'step': 4255, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:38.840555', 'step': 4255, 'epoch': 2} {'type': 'loss', 'content': 0.0071380361914634705, 'timestamp': '2025-09-30 23:07:38.853389', 'step': 4256, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1596914505344}], 'timestamp': '2025-09-30 23:07:42.848698', 'step': 4256, 'epoch': 2} {'type': 'pplx', 'content': 8311564.2124466365, 'timestamp': '2025-09-30 23:07:42.865167', 'step': 4256, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:42.916290', 'step': 4256, 'epoch': 2} {'type': 'loss', 'content': 0.026711061596870422, 'timestamp': '2025-09-30 23:07:42.919208', 'step': 4257, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:42.973768', 'step': 4257, 'epoch': 2} {'type': 'loss', 'content': 0.0016888827085494995, 'timestamp': '2025-09-30 23:07:42.977544', 'step': 4258, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:43.033299', 'step': 4258, 'epoch': 2} {'type': 'loss', 'content': 0.00042227699304930866, 'timestamp': '2025-09-30 23:07:43.040043', 'step': 4259, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:43.097597', 'step': 4259, 'epoch': 2} {'type': 'loss', 'content': 0.009673057124018669, 'timestamp': '2025-09-30 23:07:43.104042', 'step': 4260, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:43.160590', 'step': 4260, 'epoch': 2} {'type': 'loss', 'content': 0.035120073705911636, 'timestamp': '2025-09-30 23:07:43.163948', 'step': 4261, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:43.218876', 'step': 4261, 'epoch': 2} {'type': 'loss', 'content': 0.004591960925608873, 'timestamp': '2025-09-30 23:07:43.222074', 'step': 4262, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:43.277719', 'step': 4262, 'epoch': 2} {'type': 'loss', 'content': 0.0007369258673861623, 'timestamp': '2025-09-30 23:07:43.280830', 'step': 4263, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:43.335224', 'step': 4263, 'epoch': 2} {'type': 'loss', 'content': 0.0004816911823581904, 'timestamp': '2025-09-30 23:07:43.355107', 'step': 4264, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:43.413766', 'step': 4264, 'epoch': 2} {'type': 'loss', 'content': 0.0862608551979065, 'timestamp': '2025-09-30 23:07:43.416600', 'step': 4265, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:43.473696', 'step': 4265, 'epoch': 2} {'type': 'loss', 'content': 0.05248735100030899, 'timestamp': '2025-09-30 23:07:43.477978', 'step': 4266, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:43.536081', 'step': 4266, 'epoch': 2} {'type': 'loss', 'content': 0.012972704134881496, 'timestamp': '2025-09-30 23:07:43.540057', 'step': 4267, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:43.597221', 'step': 4267, 'epoch': 2} {'type': 'loss', 'content': 0.01705009676516056, 'timestamp': '2025-09-30 23:07:43.603665', 'step': 4268, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:43.657659', 'step': 4268, 'epoch': 2} {'type': 'loss', 'content': 0.03107399307191372, 'timestamp': '2025-09-30 23:07:43.661685', 'step': 4269, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:43.719097', 'step': 4269, 'epoch': 2} {'type': 'loss', 'content': 0.030767615884542465, 'timestamp': '2025-09-30 23:07:43.723135', 'step': 4270, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:43.781764', 'step': 4270, 'epoch': 2} {'type': 'loss', 'content': 0.0012071524979546666, 'timestamp': '2025-09-30 23:07:43.785991', 'step': 4271, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:43.844292', 'step': 4271, 'epoch': 2} {'type': 'loss', 'content': 0.003630770603194833, 'timestamp': '2025-09-30 23:07:43.851383', 'step': 4272, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:43.908307', 'step': 4272, 'epoch': 2} {'type': 'loss', 'content': 0.03990786895155907, 'timestamp': '2025-09-30 23:07:43.911849', 'step': 4273, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:43.965950', 'step': 4273, 'epoch': 2} {'type': 'loss', 'content': 0.04519200697541237, 'timestamp': '2025-09-30 23:07:43.968247', 'step': 4274, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:44.024026', 'step': 4274, 'epoch': 2} {'type': 'loss', 'content': 0.10382968932390213, 'timestamp': '2025-09-30 23:07:44.030861', 'step': 4275, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:44.095087', 'step': 4275, 'epoch': 2} {'type': 'loss', 'content': 0.003238184144720435, 'timestamp': '2025-09-30 23:07:44.105471', 'step': 4276, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:44.167606', 'step': 4276, 'epoch': 2} {'type': 'loss', 'content': 0.012325153686106205, 'timestamp': '2025-09-30 23:07:44.171849', 'step': 4277, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:44.228543', 'step': 4277, 'epoch': 2} {'type': 'loss', 'content': 0.01873672381043434, 'timestamp': '2025-09-30 23:07:44.232561', 'step': 4278, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:44.290333', 'step': 4278, 'epoch': 2} {'type': 'loss', 'content': 0.0007993836770765483, 'timestamp': '2025-09-30 23:07:44.295568', 'step': 4279, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:44.353260', 'step': 4279, 'epoch': 2} {'type': 'loss', 'content': 0.037050556391477585, 'timestamp': '2025-09-30 23:07:44.359870', 'step': 4280, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:44.416185', 'step': 4280, 'epoch': 2} {'type': 'loss', 'content': 0.013298794627189636, 'timestamp': '2025-09-30 23:07:44.420232', 'step': 4281, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:44.478058', 'step': 4281, 'epoch': 2} {'type': 'loss', 'content': 0.001688774093054235, 'timestamp': '2025-09-30 23:07:44.482234', 'step': 4282, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:44.540293', 'step': 4282, 'epoch': 2} {'type': 'loss', 'content': 0.06550759822130203, 'timestamp': '2025-09-30 23:07:44.544342', 'step': 4283, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:44.599844', 'step': 4283, 'epoch': 2} {'type': 'loss', 'content': 0.02754664048552513, 'timestamp': '2025-09-30 23:07:44.606612', 'step': 4284, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:44.661505', 'step': 4284, 'epoch': 2} {'type': 'loss', 'content': 0.011802450753748417, 'timestamp': '2025-09-30 23:07:44.665056', 'step': 4285, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:44.721450', 'step': 4285, 'epoch': 2} {'type': 'loss', 'content': 0.009375357069075108, 'timestamp': '2025-09-30 23:07:44.729909', 'step': 4286, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:44.786095', 'step': 4286, 'epoch': 2} {'type': 'loss', 'content': 0.030748650431632996, 'timestamp': '2025-09-30 23:07:44.789855', 'step': 4287, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:44.845158', 'step': 4287, 'epoch': 2} {'type': 'loss', 'content': 0.01280558854341507, 'timestamp': '2025-09-30 23:07:44.851550', 'step': 4288, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:44.905786', 'step': 4288, 'epoch': 2} {'type': 'loss', 'content': 0.002050126437097788, 'timestamp': '2025-09-30 23:07:44.909604', 'step': 4289, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:44.964526', 'step': 4289, 'epoch': 2} {'type': 'loss', 'content': 0.017372025176882744, 'timestamp': '2025-09-30 23:07:44.967266', 'step': 4290, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:45.023443', 'step': 4290, 'epoch': 2} {'type': 'loss', 'content': 0.013162736780941486, 'timestamp': '2025-09-30 23:07:45.028347', 'step': 4291, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:45.087420', 'step': 4291, 'epoch': 2} {'type': 'loss', 'content': 0.03055013157427311, 'timestamp': '2025-09-30 23:07:45.095726', 'step': 4292, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:45.153520', 'step': 4292, 'epoch': 2} {'type': 'loss', 'content': 0.006567643489688635, 'timestamp': '2025-09-30 23:07:45.161826', 'step': 4293, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:45.219178', 'step': 4293, 'epoch': 2} {'type': 'loss', 'content': 0.0005931461928412318, 'timestamp': '2025-09-30 23:07:45.222377', 'step': 4294, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:45.279748', 'step': 4294, 'epoch': 2} {'type': 'loss', 'content': 0.03367257118225098, 'timestamp': '2025-09-30 23:07:45.282335', 'step': 4295, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:45.338364', 'step': 4295, 'epoch': 2} {'type': 'loss', 'content': 0.006593532860279083, 'timestamp': '2025-09-30 23:07:45.348968', 'step': 4296, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:45.406221', 'step': 4296, 'epoch': 2} {'type': 'loss', 'content': 0.04105732962489128, 'timestamp': '2025-09-30 23:07:45.409784', 'step': 4297, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:45.463634', 'step': 4297, 'epoch': 2} {'type': 'loss', 'content': 0.019829172641038895, 'timestamp': '2025-09-30 23:07:45.466504', 'step': 4298, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:45.520768', 'step': 4298, 'epoch': 2} {'type': 'loss', 'content': 0.04422520846128464, 'timestamp': '2025-09-30 23:07:45.523859', 'step': 4299, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:45.577893', 'step': 4299, 'epoch': 2} {'type': 'loss', 'content': 0.010901504196226597, 'timestamp': '2025-09-30 23:07:45.583936', 'step': 4300, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:45.636989', 'step': 4300, 'epoch': 2} {'type': 'loss', 'content': 0.04055407643318176, 'timestamp': '2025-09-30 23:07:45.639797', 'step': 4301, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:45.693741', 'step': 4301, 'epoch': 2} {'type': 'loss', 'content': 0.014609337784349918, 'timestamp': '2025-09-30 23:07:45.696546', 'step': 4302, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:45.750230', 'step': 4302, 'epoch': 2} {'type': 'loss', 'content': 0.0038738527800887823, 'timestamp': '2025-09-30 23:07:45.752599', 'step': 4303, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:45.806809', 'step': 4303, 'epoch': 2} {'type': 'loss', 'content': 0.01887189783155918, 'timestamp': '2025-09-30 23:07:45.813248', 'step': 4304, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:45.866118', 'step': 4304, 'epoch': 2} {'type': 'loss', 'content': 0.006082228384912014, 'timestamp': '2025-09-30 23:07:45.869777', 'step': 4305, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:45.924678', 'step': 4305, 'epoch': 2} {'type': 'loss', 'content': 0.020818356424570084, 'timestamp': '2025-09-30 23:07:45.927607', 'step': 4306, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:45.981365', 'step': 4306, 'epoch': 2} {'type': 'loss', 'content': 0.0015423723962157965, 'timestamp': '2025-09-30 23:07:45.984270', 'step': 4307, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:46.038489', 'step': 4307, 'epoch': 2} {'type': 'loss', 'content': 0.03222086280584335, 'timestamp': '2025-09-30 23:07:46.046130', 'step': 4308, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:46.103259', 'step': 4308, 'epoch': 2} {'type': 'loss', 'content': 0.013895651325583458, 'timestamp': '2025-09-30 23:07:46.106040', 'step': 4309, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:46.164361', 'step': 4309, 'epoch': 2} {'type': 'loss', 'content': 0.018487172201275826, 'timestamp': '2025-09-30 23:07:46.168835', 'step': 4310, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:46.225737', 'step': 4310, 'epoch': 2} {'type': 'loss', 'content': 0.013556904159486294, 'timestamp': '2025-09-30 23:07:46.232557', 'step': 4311, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:46.287780', 'step': 4311, 'epoch': 2} {'type': 'loss', 'content': 0.013828583993017673, 'timestamp': '2025-09-30 23:07:46.293572', 'step': 4312, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:46.345905', 'step': 4312, 'epoch': 2} {'type': 'loss', 'content': 0.007761569228023291, 'timestamp': '2025-09-30 23:07:46.349483', 'step': 4313, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:46.402255', 'step': 4313, 'epoch': 2} {'type': 'loss', 'content': 0.03838960453867912, 'timestamp': '2025-09-30 23:07:46.405005', 'step': 4314, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:46.459603', 'step': 4314, 'epoch': 2} {'type': 'loss', 'content': 0.00601929659023881, 'timestamp': '2025-09-30 23:07:46.462065', 'step': 4315, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:46.518499', 'step': 4315, 'epoch': 2} {'type': 'loss', 'content': 0.016180751845240593, 'timestamp': '2025-09-30 23:07:46.524680', 'step': 4316, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:46.578371', 'step': 4316, 'epoch': 2} {'type': 'loss', 'content': 0.016779465600848198, 'timestamp': '2025-09-30 23:07:46.581068', 'step': 4317, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:46.634254', 'step': 4317, 'epoch': 2} {'type': 'loss', 'content': 0.002194012049585581, 'timestamp': '2025-09-30 23:07:46.636779', 'step': 4318, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:46.691204', 'step': 4318, 'epoch': 2} {'type': 'loss', 'content': 0.007938019931316376, 'timestamp': '2025-09-30 23:07:46.693511', 'step': 4319, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:46.746253', 'step': 4319, 'epoch': 2} {'type': 'loss', 'content': 0.002614737721160054, 'timestamp': '2025-09-30 23:07:46.751901', 'step': 4320, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:46.804392', 'step': 4320, 'epoch': 2} {'type': 'loss', 'content': 0.028850719332695007, 'timestamp': '2025-09-30 23:07:46.806891', 'step': 4321, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:46.859525', 'step': 4321, 'epoch': 2} {'type': 'loss', 'content': 0.00529822614043951, 'timestamp': '2025-09-30 23:07:46.862085', 'step': 4322, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:46.914920', 'step': 4322, 'epoch': 2} {'type': 'loss', 'content': 0.04187892749905586, 'timestamp': '2025-09-30 23:07:46.917483', 'step': 4323, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:46.970465', 'step': 4323, 'epoch': 2} {'type': 'loss', 'content': 0.014818847179412842, 'timestamp': '2025-09-30 23:07:46.976316', 'step': 4324, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:47.029053', 'step': 4324, 'epoch': 2} {'type': 'loss', 'content': 0.036990176886320114, 'timestamp': '2025-09-30 23:07:47.033022', 'step': 4325, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:47.091359', 'step': 4325, 'epoch': 2} {'type': 'loss', 'content': 0.005365932825952768, 'timestamp': '2025-09-30 23:07:47.096629', 'step': 4326, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:47.150140', 'step': 4326, 'epoch': 2} {'type': 'loss', 'content': 0.020383253693580627, 'timestamp': '2025-09-30 23:07:47.152527', 'step': 4327, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:47.206114', 'step': 4327, 'epoch': 2} {'type': 'loss', 'content': 0.017321160063147545, 'timestamp': '2025-09-30 23:07:47.211797', 'step': 4328, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:47.263844', 'step': 4328, 'epoch': 2} {'type': 'loss', 'content': 0.008721926249563694, 'timestamp': '2025-09-30 23:07:47.266288', 'step': 4329, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:47.319205', 'step': 4329, 'epoch': 2} {'type': 'loss', 'content': 0.014529094099998474, 'timestamp': '2025-09-30 23:07:47.321725', 'step': 4330, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:47.374288', 'step': 4330, 'epoch': 2} {'type': 'loss', 'content': 0.0012320497771725059, 'timestamp': '2025-09-30 23:07:47.376628', 'step': 4331, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:47.428997', 'step': 4331, 'epoch': 2} {'type': 'loss', 'content': 0.014829914085566998, 'timestamp': '2025-09-30 23:07:47.435240', 'step': 4332, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:47.487239', 'step': 4332, 'epoch': 2} {'type': 'loss', 'content': 0.010711157694458961, 'timestamp': '2025-09-30 23:07:47.489420', 'step': 4333, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:47.545339', 'step': 4333, 'epoch': 2} {'type': 'loss', 'content': 0.008892950601875782, 'timestamp': '2025-09-30 23:07:47.547546', 'step': 4334, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:07:47.600231', 'step': 4334, 'epoch': 2} {'type': 'loss', 'content': 0.01532528642565012, 'timestamp': '2025-09-30 23:07:47.602997', 'step': 4335, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:47.656368', 'step': 4335, 'epoch': 2} {'type': 'loss', 'content': 0.011762561276555061, 'timestamp': '2025-09-30 23:07:47.662441', 'step': 4336, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:47.714945', 'step': 4336, 'epoch': 2} {'type': 'loss', 'content': 0.01760215498507023, 'timestamp': '2025-09-30 23:07:47.717619', 'step': 4337, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:47.770410', 'step': 4337, 'epoch': 2} {'type': 'loss', 'content': 0.01766350492835045, 'timestamp': '2025-09-30 23:07:47.772470', 'step': 4338, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:47.825146', 'step': 4338, 'epoch': 2} {'type': 'loss', 'content': 0.006872680503875017, 'timestamp': '2025-09-30 23:07:47.828079', 'step': 4339, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:07:47.882515', 'step': 4339, 'epoch': 2} {'type': 'loss', 'content': 0.006947095040231943, 'timestamp': '2025-09-30 23:07:47.889937', 'step': 4340, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:47.941529', 'step': 4340, 'epoch': 2} {'type': 'loss', 'content': 0.01530208345502615, 'timestamp': '2025-09-30 23:07:47.944164', 'step': 4341, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:47.996496', 'step': 4341, 'epoch': 2} {'type': 'loss', 'content': 0.02023976854979992, 'timestamp': '2025-09-30 23:07:47.998667', 'step': 4342, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:48.052435', 'step': 4342, 'epoch': 2} {'type': 'loss', 'content': 0.004522609990090132, 'timestamp': '2025-09-30 23:07:48.055539', 'step': 4343, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:48.112642', 'step': 4343, 'epoch': 2} {'type': 'loss', 'content': 0.01290243212133646, 'timestamp': '2025-09-30 23:07:48.119299', 'step': 4344, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:48.170371', 'step': 4344, 'epoch': 2} {'type': 'loss', 'content': 0.02076055482029915, 'timestamp': '2025-09-30 23:07:48.172501', 'step': 4345, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:48.224250', 'step': 4345, 'epoch': 2} {'type': 'loss', 'content': 0.01826591230928898, 'timestamp': '2025-09-30 23:07:48.226611', 'step': 4346, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:48.278772', 'step': 4346, 'epoch': 2} {'type': 'loss', 'content': 0.00864083506166935, 'timestamp': '2025-09-30 23:07:48.281073', 'step': 4347, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:48.334339', 'step': 4347, 'epoch': 2} {'type': 'loss', 'content': 0.0022649799939244986, 'timestamp': '2025-09-30 23:07:48.340246', 'step': 4348, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:48.391885', 'step': 4348, 'epoch': 2} {'type': 'loss', 'content': 0.02495071478188038, 'timestamp': '2025-09-30 23:07:48.394325', 'step': 4349, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:48.446982', 'step': 4349, 'epoch': 2} {'type': 'loss', 'content': 0.00258767232298851, 'timestamp': '2025-09-30 23:07:48.449384', 'step': 4350, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:48.502830', 'step': 4350, 'epoch': 2} {'type': 'loss', 'content': 0.0011007784632965922, 'timestamp': '2025-09-30 23:07:48.505378', 'step': 4351, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:48.558701', 'step': 4351, 'epoch': 2} {'type': 'loss', 'content': 0.02693404071033001, 'timestamp': '2025-09-30 23:07:48.564624', 'step': 4352, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:48.620384', 'step': 4352, 'epoch': 2} {'type': 'loss', 'content': 0.004940875340253115, 'timestamp': '2025-09-30 23:07:48.622957', 'step': 4353, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:48.688898', 'step': 4353, 'epoch': 2} {'type': 'loss', 'content': 0.0024497020058333874, 'timestamp': '2025-09-30 23:07:48.691208', 'step': 4354, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:48.743767', 'step': 4354, 'epoch': 2} {'type': 'loss', 'content': 0.0085167670622468, 'timestamp': '2025-09-30 23:07:48.746255', 'step': 4355, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:48.798909', 'step': 4355, 'epoch': 2} {'type': 'loss', 'content': 0.007689155638217926, 'timestamp': '2025-09-30 23:07:48.805778', 'step': 4356, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:48.863397', 'step': 4356, 'epoch': 2} {'type': 'loss', 'content': 0.034478019922971725, 'timestamp': '2025-09-30 23:07:48.867593', 'step': 4357, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:48.923203', 'step': 4357, 'epoch': 2} {'type': 'loss', 'content': 0.01039151567965746, 'timestamp': '2025-09-30 23:07:48.925765', 'step': 4358, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:48.980497', 'step': 4358, 'epoch': 2} {'type': 'loss', 'content': 0.003904540091753006, 'timestamp': '2025-09-30 23:07:48.982823', 'step': 4359, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:49.035372', 'step': 4359, 'epoch': 2} {'type': 'loss', 'content': 0.03978709131479263, 'timestamp': '2025-09-30 23:07:49.042812', 'step': 4360, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:49.096798', 'step': 4360, 'epoch': 2} {'type': 'loss', 'content': 0.000728671788237989, 'timestamp': '2025-09-30 23:07:49.099998', 'step': 4361, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:49.152435', 'step': 4361, 'epoch': 2} {'type': 'loss', 'content': 0.0007030735723674297, 'timestamp': '2025-09-30 23:07:49.154787', 'step': 4362, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:49.207294', 'step': 4362, 'epoch': 2} {'type': 'loss', 'content': 0.004300267901271582, 'timestamp': '2025-09-30 23:07:49.209660', 'step': 4363, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:49.261807', 'step': 4363, 'epoch': 2} {'type': 'loss', 'content': 0.03509706258773804, 'timestamp': '2025-09-30 23:07:49.267517', 'step': 4364, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:49.319550', 'step': 4364, 'epoch': 2} {'type': 'loss', 'content': 0.02824283018708229, 'timestamp': '2025-09-30 23:07:49.322783', 'step': 4365, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:49.375437', 'step': 4365, 'epoch': 2} {'type': 'loss', 'content': 0.04304266348481178, 'timestamp': '2025-09-30 23:07:49.378036', 'step': 4366, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:49.430841', 'step': 4366, 'epoch': 2} {'type': 'loss', 'content': 0.009615554474294186, 'timestamp': '2025-09-30 23:07:49.433221', 'step': 4367, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:49.485638', 'step': 4367, 'epoch': 2} {'type': 'loss', 'content': 0.012103646993637085, 'timestamp': '2025-09-30 23:07:49.491759', 'step': 4368, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:49.544157', 'step': 4368, 'epoch': 2} {'type': 'loss', 'content': 0.000352265516994521, 'timestamp': '2025-09-30 23:07:49.547103', 'step': 4369, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:49.600299', 'step': 4369, 'epoch': 2} {'type': 'loss', 'content': 0.007120079360902309, 'timestamp': '2025-09-30 23:07:49.602893', 'step': 4370, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:49.656807', 'step': 4370, 'epoch': 2} {'type': 'loss', 'content': 0.013685859739780426, 'timestamp': '2025-09-30 23:07:49.659320', 'step': 4371, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:49.711774', 'step': 4371, 'epoch': 2} {'type': 'loss', 'content': 0.06446556001901627, 'timestamp': '2025-09-30 23:07:49.718806', 'step': 4372, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:49.773539', 'step': 4372, 'epoch': 2} {'type': 'loss', 'content': 0.009280616417527199, 'timestamp': '2025-09-30 23:07:49.775965', 'step': 4373, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:49.827695', 'step': 4373, 'epoch': 2} {'type': 'loss', 'content': 0.005759372375905514, 'timestamp': '2025-09-30 23:07:49.830107', 'step': 4374, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:49.882838', 'step': 4374, 'epoch': 2} {'type': 'loss', 'content': 0.032795194536447525, 'timestamp': '2025-09-30 23:07:49.885283', 'step': 4375, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:49.937299', 'step': 4375, 'epoch': 2} {'type': 'loss', 'content': 0.03303978219628334, 'timestamp': '2025-09-30 23:07:49.943273', 'step': 4376, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:49.994812', 'step': 4376, 'epoch': 2} {'type': 'loss', 'content': 0.00044555935892276466, 'timestamp': '2025-09-30 23:07:49.997115', 'step': 4377, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:50.049156', 'step': 4377, 'epoch': 2} {'type': 'loss', 'content': 0.04048677533864975, 'timestamp': '2025-09-30 23:07:50.051502', 'step': 4378, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:50.108013', 'step': 4378, 'epoch': 2} {'type': 'loss', 'content': 0.057797547429800034, 'timestamp': '2025-09-30 23:07:50.111472', 'step': 4379, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:50.166317', 'step': 4379, 'epoch': 2} {'type': 'loss', 'content': 0.0021805826108902693, 'timestamp': '2025-09-30 23:07:50.172471', 'step': 4380, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:50.223855', 'step': 4380, 'epoch': 2} {'type': 'loss', 'content': 0.030609574168920517, 'timestamp': '2025-09-30 23:07:50.226078', 'step': 4381, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:50.278712', 'step': 4381, 'epoch': 2} {'type': 'loss', 'content': 0.002181033603847027, 'timestamp': '2025-09-30 23:07:50.280897', 'step': 4382, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:50.332922', 'step': 4382, 'epoch': 2} {'type': 'loss', 'content': 0.0007584589184261858, 'timestamp': '2025-09-30 23:07:50.335952', 'step': 4383, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:50.388022', 'step': 4383, 'epoch': 2} {'type': 'loss', 'content': 0.009872698225080967, 'timestamp': '2025-09-30 23:07:50.393706', 'step': 4384, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:50.444991', 'step': 4384, 'epoch': 2} {'type': 'loss', 'content': 0.0026361707132309675, 'timestamp': '2025-09-30 23:07:50.447289', 'step': 4385, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:50.499321', 'step': 4385, 'epoch': 2} {'type': 'loss', 'content': 0.007897342555224895, 'timestamp': '2025-09-30 23:07:50.501904', 'step': 4386, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:50.554734', 'step': 4386, 'epoch': 2} {'type': 'loss', 'content': 0.039503250271081924, 'timestamp': '2025-09-30 23:07:50.556865', 'step': 4387, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:50.608539', 'step': 4387, 'epoch': 2} {'type': 'loss', 'content': 0.007525707595050335, 'timestamp': '2025-09-30 23:07:50.614156', 'step': 4388, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:50.666021', 'step': 4388, 'epoch': 2} {'type': 'loss', 'content': 0.0012861188733950257, 'timestamp': '2025-09-30 23:07:50.668601', 'step': 4389, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:50.721252', 'step': 4389, 'epoch': 2} {'type': 'loss', 'content': 0.007486972026526928, 'timestamp': '2025-09-30 23:07:50.723736', 'step': 4390, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:50.776029', 'step': 4390, 'epoch': 2} {'type': 'loss', 'content': 0.06929724663496017, 'timestamp': '2025-09-30 23:07:50.778265', 'step': 4391, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:50.829937', 'step': 4391, 'epoch': 2} {'type': 'loss', 'content': 0.05351630225777626, 'timestamp': '2025-09-30 23:07:50.835921', 'step': 4392, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:50.888742', 'step': 4392, 'epoch': 2} {'type': 'loss', 'content': 0.010513772256672382, 'timestamp': '2025-09-30 23:07:50.891341', 'step': 4393, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:50.943259', 'step': 4393, 'epoch': 2} {'type': 'loss', 'content': 0.050234969705343246, 'timestamp': '2025-09-30 23:07:50.946311', 'step': 4394, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:50.999123', 'step': 4394, 'epoch': 2} {'type': 'loss', 'content': 0.000278273451840505, 'timestamp': '2025-09-30 23:07:51.001968', 'step': 4395, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:51.057254', 'step': 4395, 'epoch': 2} {'type': 'loss', 'content': 0.06193198636174202, 'timestamp': '2025-09-30 23:07:51.063844', 'step': 4396, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:51.116817', 'step': 4396, 'epoch': 2} {'type': 'loss', 'content': 0.021114246919751167, 'timestamp': '2025-09-30 23:07:51.120366', 'step': 4397, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:51.174694', 'step': 4397, 'epoch': 2} {'type': 'loss', 'content': 0.01201788429170847, 'timestamp': '2025-09-30 23:07:51.177157', 'step': 4398, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:51.229122', 'step': 4398, 'epoch': 2} {'type': 'loss', 'content': 0.005902249366044998, 'timestamp': '2025-09-30 23:07:51.231645', 'step': 4399, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:51.283646', 'step': 4399, 'epoch': 2} {'type': 'loss', 'content': 0.005075323395431042, 'timestamp': '2025-09-30 23:07:51.289918', 'step': 4400, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:51.345174', 'step': 4400, 'epoch': 2} {'type': 'loss', 'content': 0.0026780483312904835, 'timestamp': '2025-09-30 23:07:51.348254', 'step': 4401, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:51.400898', 'step': 4401, 'epoch': 2} {'type': 'loss', 'content': 0.03715026378631592, 'timestamp': '2025-09-30 23:07:51.408037', 'step': 4402, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:51.460788', 'step': 4402, 'epoch': 2} {'type': 'loss', 'content': 0.03485891595482826, 'timestamp': '2025-09-30 23:07:51.463145', 'step': 4403, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:51.518449', 'step': 4403, 'epoch': 2} {'type': 'loss', 'content': 0.0026692489627748728, 'timestamp': '2025-09-30 23:07:51.524394', 'step': 4404, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:51.584841', 'step': 4404, 'epoch': 2} {'type': 'loss', 'content': 0.017803067341446877, 'timestamp': '2025-09-30 23:07:51.587330', 'step': 4405, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:51.639661', 'step': 4405, 'epoch': 2} {'type': 'loss', 'content': 0.03089076280593872, 'timestamp': '2025-09-30 23:07:51.642175', 'step': 4406, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:51.695526', 'step': 4406, 'epoch': 2} {'type': 'loss', 'content': 0.025283794850111008, 'timestamp': '2025-09-30 23:07:51.700316', 'step': 4407, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:51.756677', 'step': 4407, 'epoch': 2} {'type': 'loss', 'content': 0.012457109056413174, 'timestamp': '2025-09-30 23:07:51.764411', 'step': 4408, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1596914505344}], 'timestamp': '2025-09-30 23:07:55.738868', 'step': 4408, 'epoch': 2} {'type': 'pplx', 'content': 5796947.302618793, 'timestamp': '2025-09-30 23:07:55.743062', 'step': 4408, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:55.797880', 'step': 4408, 'epoch': 2} {'type': 'loss', 'content': 0.00821553822606802, 'timestamp': '2025-09-30 23:07:55.800553', 'step': 4409, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:55.858442', 'step': 4409, 'epoch': 2} {'type': 'loss', 'content': 0.012818023562431335, 'timestamp': '2025-09-30 23:07:55.861113', 'step': 4410, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:55.916551', 'step': 4410, 'epoch': 2} {'type': 'loss', 'content': 0.037889134138822556, 'timestamp': '2025-09-30 23:07:55.920072', 'step': 4411, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:55.978265', 'step': 4411, 'epoch': 2} {'type': 'loss', 'content': 0.03182809427380562, 'timestamp': '2025-09-30 23:07:55.984230', 'step': 4412, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:56.038150', 'step': 4412, 'epoch': 2} {'type': 'loss', 'content': 0.037114568054676056, 'timestamp': '2025-09-30 23:07:56.043754', 'step': 4413, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:56.101837', 'step': 4413, 'epoch': 2} {'type': 'loss', 'content': 0.02472391352057457, 'timestamp': '2025-09-30 23:07:56.108666', 'step': 4414, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:56.172544', 'step': 4414, 'epoch': 2} {'type': 'loss', 'content': 0.04790797084569931, 'timestamp': '2025-09-30 23:07:56.179234', 'step': 4415, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:56.238859', 'step': 4415, 'epoch': 2} {'type': 'loss', 'content': 0.0009199038031511009, 'timestamp': '2025-09-30 23:07:56.247487', 'step': 4416, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:56.304291', 'step': 4416, 'epoch': 2} {'type': 'loss', 'content': 0.028957495465874672, 'timestamp': '2025-09-30 23:07:56.307385', 'step': 4417, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:07:56.363279', 'step': 4417, 'epoch': 2} {'type': 'loss', 'content': 0.03962855413556099, 'timestamp': '2025-09-30 23:07:56.366950', 'step': 4418, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:56.423603', 'step': 4418, 'epoch': 2} {'type': 'loss', 'content': 0.010031270794570446, 'timestamp': '2025-09-30 23:07:56.426676', 'step': 4419, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:56.483248', 'step': 4419, 'epoch': 2} {'type': 'loss', 'content': 0.00633360305801034, 'timestamp': '2025-09-30 23:07:56.490181', 'step': 4420, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:56.545046', 'step': 4420, 'epoch': 2} {'type': 'loss', 'content': 0.0043663266114890575, 'timestamp': '2025-09-30 23:07:56.548249', 'step': 4421, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:56.606258', 'step': 4421, 'epoch': 2} {'type': 'loss', 'content': 0.035582561045885086, 'timestamp': '2025-09-30 23:07:56.609496', 'step': 4422, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:56.663805', 'step': 4422, 'epoch': 2} {'type': 'loss', 'content': 0.042418014258146286, 'timestamp': '2025-09-30 23:07:56.666477', 'step': 4423, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:56.720363', 'step': 4423, 'epoch': 2} {'type': 'loss', 'content': 0.019048364832997322, 'timestamp': '2025-09-30 23:07:56.726658', 'step': 4424, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:56.781631', 'step': 4424, 'epoch': 2} {'type': 'loss', 'content': 0.010721084661781788, 'timestamp': '2025-09-30 23:07:56.785349', 'step': 4425, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:56.841841', 'step': 4425, 'epoch': 2} {'type': 'loss', 'content': 0.030820706859230995, 'timestamp': '2025-09-30 23:07:56.845891', 'step': 4426, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:56.900239', 'step': 4426, 'epoch': 2} {'type': 'loss', 'content': 0.011109941639006138, 'timestamp': '2025-09-30 23:07:56.903554', 'step': 4427, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:56.958835', 'step': 4427, 'epoch': 2} {'type': 'loss', 'content': 0.0031683403067290783, 'timestamp': '2025-09-30 23:07:56.969137', 'step': 4428, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:57.023031', 'step': 4428, 'epoch': 2} {'type': 'loss', 'content': 0.0018705095862969756, 'timestamp': '2025-09-30 23:07:57.026328', 'step': 4429, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:57.081076', 'step': 4429, 'epoch': 2} {'type': 'loss', 'content': 0.020790671929717064, 'timestamp': '2025-09-30 23:07:57.083160', 'step': 4430, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:57.138334', 'step': 4430, 'epoch': 2} {'type': 'loss', 'content': 0.002013225806877017, 'timestamp': '2025-09-30 23:07:57.142291', 'step': 4431, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:57.200006', 'step': 4431, 'epoch': 2} {'type': 'loss', 'content': 0.011018635705113411, 'timestamp': '2025-09-30 23:07:57.208149', 'step': 4432, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:57.264319', 'step': 4432, 'epoch': 2} {'type': 'loss', 'content': 0.03027925454080105, 'timestamp': '2025-09-30 23:07:57.267371', 'step': 4433, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:57.327215', 'step': 4433, 'epoch': 2} {'type': 'loss', 'content': 0.001995191676542163, 'timestamp': '2025-09-30 23:07:57.331820', 'step': 4434, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:57.387071', 'step': 4434, 'epoch': 2} {'type': 'loss', 'content': 0.022532593458890915, 'timestamp': '2025-09-30 23:07:57.390096', 'step': 4435, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:57.445784', 'step': 4435, 'epoch': 2} {'type': 'loss', 'content': 0.018485264852643013, 'timestamp': '2025-09-30 23:07:57.451983', 'step': 4436, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:57.507210', 'step': 4436, 'epoch': 2} {'type': 'loss', 'content': 0.027721550315618515, 'timestamp': '2025-09-30 23:07:57.509881', 'step': 4437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:57.563214', 'step': 4437, 'epoch': 2} {'type': 'loss', 'content': 0.017207728698849678, 'timestamp': '2025-09-30 23:07:57.566292', 'step': 4438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:57.621329', 'step': 4438, 'epoch': 2} {'type': 'loss', 'content': 0.02357405610382557, 'timestamp': '2025-09-30 23:07:57.623991', 'step': 4439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:57.678438', 'step': 4439, 'epoch': 2} {'type': 'loss', 'content': 0.0584232322871685, 'timestamp': '2025-09-30 23:07:57.685105', 'step': 4440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:57.738921', 'step': 4440, 'epoch': 2} {'type': 'loss', 'content': 0.03206290677189827, 'timestamp': '2025-09-30 23:07:57.742773', 'step': 4441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:57.798144', 'step': 4441, 'epoch': 2} {'type': 'loss', 'content': 0.02272523008286953, 'timestamp': '2025-09-30 23:07:57.805206', 'step': 4442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:07:57.867387', 'step': 4442, 'epoch': 2} {'type': 'loss', 'content': 0.014826701954007149, 'timestamp': '2025-09-30 23:07:57.870158', 'step': 4443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:57.937057', 'step': 4443, 'epoch': 2} {'type': 'loss', 'content': 0.01077108085155487, 'timestamp': '2025-09-30 23:07:57.947450', 'step': 4444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:58.009281', 'step': 4444, 'epoch': 2} {'type': 'loss', 'content': 0.012835188768804073, 'timestamp': '2025-09-30 23:07:58.012356', 'step': 4445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:58.069218', 'step': 4445, 'epoch': 2} {'type': 'loss', 'content': 0.018687184900045395, 'timestamp': '2025-09-30 23:07:58.071919', 'step': 4446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:58.125714', 'step': 4446, 'epoch': 2} {'type': 'loss', 'content': 0.014967729337513447, 'timestamp': '2025-09-30 23:07:58.130189', 'step': 4447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:58.191283', 'step': 4447, 'epoch': 2} {'type': 'loss', 'content': 0.014904571697115898, 'timestamp': '2025-09-30 23:07:58.200289', 'step': 4448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:58.258417', 'step': 4448, 'epoch': 2} {'type': 'loss', 'content': 0.013094902038574219, 'timestamp': '2025-09-30 23:07:58.261642', 'step': 4449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:07:58.315482', 'step': 4449, 'epoch': 2} {'type': 'loss', 'content': 0.033463820815086365, 'timestamp': '2025-09-30 23:07:58.319521', 'step': 4450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:58.374046', 'step': 4450, 'epoch': 2} {'type': 'loss', 'content': 0.0011005798587575555, 'timestamp': '2025-09-30 23:07:58.379816', 'step': 4451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:58.435581', 'step': 4451, 'epoch': 2} {'type': 'loss', 'content': 0.020983127877116203, 'timestamp': '2025-09-30 23:07:58.441065', 'step': 4452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:58.495104', 'step': 4452, 'epoch': 2} {'type': 'loss', 'content': 0.010270762257277966, 'timestamp': '2025-09-30 23:07:58.496767', 'step': 4453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:58.548485', 'step': 4453, 'epoch': 2} {'type': 'loss', 'content': 0.005312859546393156, 'timestamp': '2025-09-30 23:07:58.551275', 'step': 4454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:58.603091', 'step': 4454, 'epoch': 2} {'type': 'loss', 'content': 0.010556933470070362, 'timestamp': '2025-09-30 23:07:58.605284', 'step': 4455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:58.661618', 'step': 4455, 'epoch': 2} {'type': 'loss', 'content': 0.02017931453883648, 'timestamp': '2025-09-30 23:07:58.669290', 'step': 4456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:58.720584', 'step': 4456, 'epoch': 2} {'type': 'loss', 'content': 0.020022645592689514, 'timestamp': '2025-09-30 23:07:58.722817', 'step': 4457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:58.774722', 'step': 4457, 'epoch': 2} {'type': 'loss', 'content': 0.018354246392846107, 'timestamp': '2025-09-30 23:07:58.777308', 'step': 4458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:58.829839', 'step': 4458, 'epoch': 2} {'type': 'loss', 'content': 0.02055193856358528, 'timestamp': '2025-09-30 23:07:58.832097', 'step': 4459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:58.884556', 'step': 4459, 'epoch': 2} {'type': 'loss', 'content': 0.030344469472765923, 'timestamp': '2025-09-30 23:07:58.890425', 'step': 4460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:58.942481', 'step': 4460, 'epoch': 2} {'type': 'loss', 'content': 0.005861715413630009, 'timestamp': '2025-09-30 23:07:58.944744', 'step': 4461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:58.996316', 'step': 4461, 'epoch': 2} {'type': 'loss', 'content': 0.0034327073954045773, 'timestamp': '2025-09-30 23:07:58.997877', 'step': 4462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:59.049015', 'step': 4462, 'epoch': 2} {'type': 'loss', 'content': 0.008305445313453674, 'timestamp': '2025-09-30 23:07:59.051435', 'step': 4463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:59.104651', 'step': 4463, 'epoch': 2} {'type': 'loss', 'content': 0.01861044578254223, 'timestamp': '2025-09-30 23:07:59.110447', 'step': 4464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:59.161252', 'step': 4464, 'epoch': 2} {'type': 'loss', 'content': 0.015593205578625202, 'timestamp': '2025-09-30 23:07:59.164005', 'step': 4465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:59.218554', 'step': 4465, 'epoch': 2} {'type': 'loss', 'content': 0.013071276247501373, 'timestamp': '2025-09-30 23:07:59.220780', 'step': 4466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:59.272616', 'step': 4466, 'epoch': 2} {'type': 'loss', 'content': 0.046122729778289795, 'timestamp': '2025-09-30 23:07:59.274834', 'step': 4467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:59.326329', 'step': 4467, 'epoch': 2} {'type': 'loss', 'content': 0.02228761650621891, 'timestamp': '2025-09-30 23:07:59.331470', 'step': 4468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:59.382715', 'step': 4468, 'epoch': 2} {'type': 'loss', 'content': 0.014806305058300495, 'timestamp': '2025-09-30 23:07:59.384625', 'step': 4469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:59.436422', 'step': 4469, 'epoch': 2} {'type': 'loss', 'content': 0.011446401476860046, 'timestamp': '2025-09-30 23:07:59.438409', 'step': 4470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:59.493269', 'step': 4470, 'epoch': 2} {'type': 'loss', 'content': 0.01977379620075226, 'timestamp': '2025-09-30 23:07:59.495440', 'step': 4471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:59.547151', 'step': 4471, 'epoch': 2} {'type': 'loss', 'content': 0.016684509813785553, 'timestamp': '2025-09-30 23:07:59.552861', 'step': 4472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:59.605855', 'step': 4472, 'epoch': 2} {'type': 'loss', 'content': 0.005467127542942762, 'timestamp': '2025-09-30 23:07:59.608317', 'step': 4473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:59.660729', 'step': 4473, 'epoch': 2} {'type': 'loss', 'content': 0.00530041242018342, 'timestamp': '2025-09-30 23:07:59.663370', 'step': 4474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:59.715952', 'step': 4474, 'epoch': 2} {'type': 'loss', 'content': 0.004615843296051025, 'timestamp': '2025-09-30 23:07:59.718295', 'step': 4475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:59.770729', 'step': 4475, 'epoch': 2} {'type': 'loss', 'content': 0.0033941830042749643, 'timestamp': '2025-09-30 23:07:59.775889', 'step': 4476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:59.827112', 'step': 4476, 'epoch': 2} {'type': 'loss', 'content': 0.026812901720404625, 'timestamp': '2025-09-30 23:07:59.829143', 'step': 4477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:59.881871', 'step': 4477, 'epoch': 2} {'type': 'loss', 'content': 0.022130627185106277, 'timestamp': '2025-09-30 23:07:59.883531', 'step': 4478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:07:59.935251', 'step': 4478, 'epoch': 2} {'type': 'loss', 'content': 0.033584531396627426, 'timestamp': '2025-09-30 23:07:59.938025', 'step': 4479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:07:59.990558', 'step': 4479, 'epoch': 2} {'type': 'loss', 'content': 0.0022738080006092787, 'timestamp': '2025-09-30 23:07:59.996507', 'step': 4480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:00.048481', 'step': 4480, 'epoch': 2} {'type': 'loss', 'content': 0.015777284279465675, 'timestamp': '2025-09-30 23:08:00.051234', 'step': 4481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:00.104437', 'step': 4481, 'epoch': 2} {'type': 'loss', 'content': 0.015647858381271362, 'timestamp': '2025-09-30 23:08:00.106595', 'step': 4482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:00.159069', 'step': 4482, 'epoch': 2} {'type': 'loss', 'content': 0.0018917527049779892, 'timestamp': '2025-09-30 23:08:00.162368', 'step': 4483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:00.218775', 'step': 4483, 'epoch': 2} {'type': 'loss', 'content': 0.013581800274550915, 'timestamp': '2025-09-30 23:08:00.224368', 'step': 4484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:00.276402', 'step': 4484, 'epoch': 2} {'type': 'loss', 'content': 0.01364681962877512, 'timestamp': '2025-09-30 23:08:00.278223', 'step': 4485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:00.330316', 'step': 4485, 'epoch': 2} {'type': 'loss', 'content': 0.009690439328551292, 'timestamp': '2025-09-30 23:08:00.332326', 'step': 4486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:00.385300', 'step': 4486, 'epoch': 2} {'type': 'loss', 'content': 0.01040333416312933, 'timestamp': '2025-09-30 23:08:00.387340', 'step': 4487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:00.442657', 'step': 4487, 'epoch': 2} {'type': 'loss', 'content': 0.004489883314818144, 'timestamp': '2025-09-30 23:08:00.448616', 'step': 4488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:00.501156', 'step': 4488, 'epoch': 2} {'type': 'loss', 'content': 0.01633695885539055, 'timestamp': '2025-09-30 23:08:00.503599', 'step': 4489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:00.556203', 'step': 4489, 'epoch': 2} {'type': 'loss', 'content': 0.0065319230780005455, 'timestamp': '2025-09-30 23:08:00.558768', 'step': 4490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:00.611640', 'step': 4490, 'epoch': 2} {'type': 'loss', 'content': 0.001111609861254692, 'timestamp': '2025-09-30 23:08:00.614057', 'step': 4491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:00.666877', 'step': 4491, 'epoch': 2} {'type': 'loss', 'content': 0.0009215558529831469, 'timestamp': '2025-09-30 23:08:00.672722', 'step': 4492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:00.728113', 'step': 4492, 'epoch': 2} {'type': 'loss', 'content': 0.015471288003027439, 'timestamp': '2025-09-30 23:08:00.730303', 'step': 4493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:00.785884', 'step': 4493, 'epoch': 2} {'type': 'loss', 'content': 0.007153279613703489, 'timestamp': '2025-09-30 23:08:00.787632', 'step': 4494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:00.840443', 'step': 4494, 'epoch': 2} {'type': 'loss', 'content': 0.00081715575652197, 'timestamp': '2025-09-30 23:08:00.843181', 'step': 4495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:00.901218', 'step': 4495, 'epoch': 2} {'type': 'loss', 'content': 0.003705214476212859, 'timestamp': '2025-09-30 23:08:00.907847', 'step': 4496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:00.962159', 'step': 4496, 'epoch': 2} {'type': 'loss', 'content': 0.0027392276097089052, 'timestamp': '2025-09-30 23:08:00.967275', 'step': 4497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:01.028138', 'step': 4497, 'epoch': 2} {'type': 'loss', 'content': 0.0052529312670230865, 'timestamp': '2025-09-30 23:08:01.030964', 'step': 4498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:01.090721', 'step': 4498, 'epoch': 2} {'type': 'loss', 'content': 0.010528819635510445, 'timestamp': '2025-09-30 23:08:01.093255', 'step': 4499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:01.147661', 'step': 4499, 'epoch': 2} {'type': 'loss', 'content': 0.03753996267914772, 'timestamp': '2025-09-30 23:08:01.156384', 'step': 4500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 4500', 'timestamp': '2025-09-30 23:08:01.734329', 'step': 4500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:01.792496', 'step': 4500, 'epoch': 2} {'type': 'loss', 'content': 0.0024805169086903334, 'timestamp': '2025-09-30 23:08:01.799753', 'step': 4501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:01.860573', 'step': 4501, 'epoch': 2} {'type': 'loss', 'content': 0.010822320356965065, 'timestamp': '2025-09-30 23:08:01.863607', 'step': 4502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:01.917867', 'step': 4502, 'epoch': 2} {'type': 'loss', 'content': 0.003681757254526019, 'timestamp': '2025-09-30 23:08:01.920865', 'step': 4503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:01.973777', 'step': 4503, 'epoch': 2} {'type': 'loss', 'content': 0.004590404685586691, 'timestamp': '2025-09-30 23:08:01.979947', 'step': 4504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:02.031948', 'step': 4504, 'epoch': 2} {'type': 'loss', 'content': 0.000649908441118896, 'timestamp': '2025-09-30 23:08:02.034394', 'step': 4505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:08:02.087189', 'step': 4505, 'epoch': 2} {'type': 'loss', 'content': 0.0065277512185275555, 'timestamp': '2025-09-30 23:08:02.089312', 'step': 4506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:02.143969', 'step': 4506, 'epoch': 2} {'type': 'loss', 'content': 0.0007220452534966171, 'timestamp': '2025-09-30 23:08:02.146225', 'step': 4507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:02.201663', 'step': 4507, 'epoch': 2} {'type': 'loss', 'content': 0.0920780822634697, 'timestamp': '2025-09-30 23:08:02.208798', 'step': 4508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:02.264495', 'step': 4508, 'epoch': 2} {'type': 'loss', 'content': 0.03265460208058357, 'timestamp': '2025-09-30 23:08:02.266619', 'step': 4509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:02.320061', 'step': 4509, 'epoch': 2} {'type': 'loss', 'content': 0.01666535623371601, 'timestamp': '2025-09-30 23:08:02.322716', 'step': 4510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:02.385336', 'step': 4510, 'epoch': 2} {'type': 'loss', 'content': 0.0011069276370108128, 'timestamp': '2025-09-30 23:08:02.387912', 'step': 4511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:02.450887', 'step': 4511, 'epoch': 2} {'type': 'loss', 'content': 0.0066626304760575294, 'timestamp': '2025-09-30 23:08:02.456859', 'step': 4512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:02.518909', 'step': 4512, 'epoch': 2} {'type': 'loss', 'content': 0.002850291086360812, 'timestamp': '2025-09-30 23:08:02.521170', 'step': 4513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:02.578649', 'step': 4513, 'epoch': 2} {'type': 'loss', 'content': 0.009519451297819614, 'timestamp': '2025-09-30 23:08:02.580702', 'step': 4514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:02.633470', 'step': 4514, 'epoch': 2} {'type': 'loss', 'content': 0.00017408312123734504, 'timestamp': '2025-09-30 23:08:02.635546', 'step': 4515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:02.687366', 'step': 4515, 'epoch': 2} {'type': 'loss', 'content': 0.026726601645350456, 'timestamp': '2025-09-30 23:08:02.692958', 'step': 4516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:02.745893', 'step': 4516, 'epoch': 2} {'type': 'loss', 'content': 0.008598914369940758, 'timestamp': '2025-09-30 23:08:02.748346', 'step': 4517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:02.801558', 'step': 4517, 'epoch': 2} {'type': 'loss', 'content': 0.013773651793599129, 'timestamp': '2025-09-30 23:08:02.804140', 'step': 4518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:02.856261', 'step': 4518, 'epoch': 2} {'type': 'loss', 'content': 0.001779138226993382, 'timestamp': '2025-09-30 23:08:02.859033', 'step': 4519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:02.912427', 'step': 4519, 'epoch': 2} {'type': 'loss', 'content': 0.0029050398152321577, 'timestamp': '2025-09-30 23:08:02.918699', 'step': 4520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:02.970214', 'step': 4520, 'epoch': 2} {'type': 'loss', 'content': 0.000263696740148589, 'timestamp': '2025-09-30 23:08:02.972556', 'step': 4521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:03.026761', 'step': 4521, 'epoch': 2} {'type': 'loss', 'content': 0.0023250689264386892, 'timestamp': '2025-09-30 23:08:03.029093', 'step': 4522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:03.081051', 'step': 4522, 'epoch': 2} {'type': 'loss', 'content': 0.041089024394750595, 'timestamp': '2025-09-30 23:08:03.083397', 'step': 4523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:03.136582', 'step': 4523, 'epoch': 2} {'type': 'loss', 'content': 0.00023676645650994033, 'timestamp': '2025-09-30 23:08:03.142234', 'step': 4524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:03.193291', 'step': 4524, 'epoch': 2} {'type': 'loss', 'content': 0.03316250443458557, 'timestamp': '2025-09-30 23:08:03.195948', 'step': 4525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:03.251478', 'step': 4525, 'epoch': 2} {'type': 'loss', 'content': 0.006085801403969526, 'timestamp': '2025-09-30 23:08:03.254147', 'step': 4526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:03.307141', 'step': 4526, 'epoch': 2} {'type': 'loss', 'content': 0.01851208135485649, 'timestamp': '2025-09-30 23:08:03.309776', 'step': 4527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:03.361590', 'step': 4527, 'epoch': 2} {'type': 'loss', 'content': 0.012338362634181976, 'timestamp': '2025-09-30 23:08:03.367296', 'step': 4528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:03.419192', 'step': 4528, 'epoch': 2} {'type': 'loss', 'content': 0.003061672206968069, 'timestamp': '2025-09-30 23:08:03.421505', 'step': 4529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:03.473634', 'step': 4529, 'epoch': 2} {'type': 'loss', 'content': 0.0028764440212398767, 'timestamp': '2025-09-30 23:08:03.476266', 'step': 4530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:03.528776', 'step': 4530, 'epoch': 2} {'type': 'loss', 'content': 0.026616394519805908, 'timestamp': '2025-09-30 23:08:03.531353', 'step': 4531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:03.583561', 'step': 4531, 'epoch': 2} {'type': 'loss', 'content': 0.007676075212657452, 'timestamp': '2025-09-30 23:08:03.589584', 'step': 4532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:03.640984', 'step': 4532, 'epoch': 2} {'type': 'loss', 'content': 0.0029336309526115656, 'timestamp': '2025-09-30 23:08:03.643463', 'step': 4533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:03.695901', 'step': 4533, 'epoch': 2} {'type': 'loss', 'content': 0.036160413175821304, 'timestamp': '2025-09-30 23:08:03.698016', 'step': 4534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:03.750848', 'step': 4534, 'epoch': 2} {'type': 'loss', 'content': 0.006582851521670818, 'timestamp': '2025-09-30 23:08:03.753470', 'step': 4535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:03.805611', 'step': 4535, 'epoch': 2} {'type': 'loss', 'content': 0.00035916498745791614, 'timestamp': '2025-09-30 23:08:03.811668', 'step': 4536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:03.866529', 'step': 4536, 'epoch': 2} {'type': 'loss', 'content': 0.008819667622447014, 'timestamp': '2025-09-30 23:08:03.877334', 'step': 4537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:03.949210', 'step': 4537, 'epoch': 2} {'type': 'loss', 'content': 0.0018630193080753088, 'timestamp': '2025-09-30 23:08:03.951846', 'step': 4538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:04.004015', 'step': 4538, 'epoch': 2} {'type': 'loss', 'content': 0.0464123860001564, 'timestamp': '2025-09-30 23:08:04.006606', 'step': 4539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:04.058555', 'step': 4539, 'epoch': 2} {'type': 'loss', 'content': 0.0008883283589966595, 'timestamp': '2025-09-30 23:08:04.064498', 'step': 4540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:04.116780', 'step': 4540, 'epoch': 2} {'type': 'loss', 'content': 0.005109682213515043, 'timestamp': '2025-09-30 23:08:04.120135', 'step': 4541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:04.172574', 'step': 4541, 'epoch': 2} {'type': 'loss', 'content': 0.0051873186603188515, 'timestamp': '2025-09-30 23:08:04.176028', 'step': 4542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:04.230373', 'step': 4542, 'epoch': 2} {'type': 'loss', 'content': 0.0003063031181227416, 'timestamp': '2025-09-30 23:08:04.233011', 'step': 4543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:04.290923', 'step': 4543, 'epoch': 2} {'type': 'loss', 'content': 0.004031138960272074, 'timestamp': '2025-09-30 23:08:04.296665', 'step': 4544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:04.348037', 'step': 4544, 'epoch': 2} {'type': 'loss', 'content': 0.011046817526221275, 'timestamp': '2025-09-30 23:08:04.350700', 'step': 4545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:04.403199', 'step': 4545, 'epoch': 2} {'type': 'loss', 'content': 0.06235837936401367, 'timestamp': '2025-09-30 23:08:04.405641', 'step': 4546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:04.461285', 'step': 4546, 'epoch': 2} {'type': 'loss', 'content': 0.0010685193119570613, 'timestamp': '2025-09-30 23:08:04.463693', 'step': 4547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:04.515503', 'step': 4547, 'epoch': 2} {'type': 'loss', 'content': 0.032915499061346054, 'timestamp': '2025-09-30 23:08:04.521501', 'step': 4548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:04.574078', 'step': 4548, 'epoch': 2} {'type': 'loss', 'content': 0.002241789596155286, 'timestamp': '2025-09-30 23:08:04.576558', 'step': 4549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:04.628774', 'step': 4549, 'epoch': 2} {'type': 'loss', 'content': 0.0018003428122028708, 'timestamp': '2025-09-30 23:08:04.631086', 'step': 4550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:04.682773', 'step': 4550, 'epoch': 2} {'type': 'loss', 'content': 0.025347372516989708, 'timestamp': '2025-09-30 23:08:04.685090', 'step': 4551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:04.738770', 'step': 4551, 'epoch': 2} {'type': 'loss', 'content': 0.027056340128183365, 'timestamp': '2025-09-30 23:08:04.744591', 'step': 4552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:04.796261', 'step': 4552, 'epoch': 2} {'type': 'loss', 'content': 0.0003162229841109365, 'timestamp': '2025-09-30 23:08:04.801028', 'step': 4553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:04.858966', 'step': 4553, 'epoch': 2} {'type': 'loss', 'content': 0.0021760067902505398, 'timestamp': '2025-09-30 23:08:04.861232', 'step': 4554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:04.914007', 'step': 4554, 'epoch': 2} {'type': 'loss', 'content': 0.02241465263068676, 'timestamp': '2025-09-30 23:08:04.916260', 'step': 4555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:04.969443', 'step': 4555, 'epoch': 2} {'type': 'loss', 'content': 0.01950056478381157, 'timestamp': '2025-09-30 23:08:04.975350', 'step': 4556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:05.027769', 'step': 4556, 'epoch': 2} {'type': 'loss', 'content': 0.02456807717680931, 'timestamp': '2025-09-30 23:08:05.030521', 'step': 4557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:05.083448', 'step': 4557, 'epoch': 2} {'type': 'loss', 'content': 0.0029473837930709124, 'timestamp': '2025-09-30 23:08:05.085779', 'step': 4558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:05.138757', 'step': 4558, 'epoch': 2} {'type': 'loss', 'content': 0.013349506072700024, 'timestamp': '2025-09-30 23:08:05.141180', 'step': 4559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:05.193802', 'step': 4559, 'epoch': 2} {'type': 'loss', 'content': 0.0034747987519949675, 'timestamp': '2025-09-30 23:08:05.200729', 'step': 4560, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1596914505344}], 'timestamp': '2025-09-30 23:08:08.730639', 'step': 4560, 'epoch': 2} {'type': 'pplx', 'content': 8183947.311951997, 'timestamp': '2025-09-30 23:08:08.733968', 'step': 4560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:08.785822', 'step': 4560, 'epoch': 2} {'type': 'loss', 'content': 0.00546998018398881, 'timestamp': '2025-09-30 23:08:08.789378', 'step': 4561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:08.844606', 'step': 4561, 'epoch': 2} {'type': 'loss', 'content': 0.00025188917061313987, 'timestamp': '2025-09-30 23:08:08.847525', 'step': 4562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:08.906490', 'step': 4562, 'epoch': 2} {'type': 'loss', 'content': 0.0009386492893099785, 'timestamp': '2025-09-30 23:08:08.908849', 'step': 4563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:08.965572', 'step': 4563, 'epoch': 2} {'type': 'loss', 'content': 0.0042730369605124, 'timestamp': '2025-09-30 23:08:08.972364', 'step': 4564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:09.028065', 'step': 4564, 'epoch': 2} {'type': 'loss', 'content': 0.05228195711970329, 'timestamp': '2025-09-30 23:08:09.030773', 'step': 4565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:09.085764', 'step': 4565, 'epoch': 2} {'type': 'loss', 'content': 0.0008845357224345207, 'timestamp': '2025-09-30 23:08:09.089777', 'step': 4566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:09.145629', 'step': 4566, 'epoch': 2} {'type': 'loss', 'content': 0.0003383361326996237, 'timestamp': '2025-09-30 23:08:09.148533', 'step': 4567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:09.202121', 'step': 4567, 'epoch': 2} {'type': 'loss', 'content': 0.016383331269025803, 'timestamp': '2025-09-30 23:08:09.208525', 'step': 4568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:09.264870', 'step': 4568, 'epoch': 2} {'type': 'loss', 'content': 0.024939564988017082, 'timestamp': '2025-09-30 23:08:09.269545', 'step': 4569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:09.329199', 'step': 4569, 'epoch': 2} {'type': 'loss', 'content': 0.005249573849141598, 'timestamp': '2025-09-30 23:08:09.333295', 'step': 4570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:09.393394', 'step': 4570, 'epoch': 2} {'type': 'loss', 'content': 0.002441574353724718, 'timestamp': '2025-09-30 23:08:09.396569', 'step': 4571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:09.452545', 'step': 4571, 'epoch': 2} {'type': 'loss', 'content': 0.007998877204954624, 'timestamp': '2025-09-30 23:08:09.458960', 'step': 4572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:09.513200', 'step': 4572, 'epoch': 2} {'type': 'loss', 'content': 0.0004499823844525963, 'timestamp': '2025-09-30 23:08:09.516141', 'step': 4573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:09.571357', 'step': 4573, 'epoch': 2} {'type': 'loss', 'content': 0.005977503955364227, 'timestamp': '2025-09-30 23:08:09.574092', 'step': 4574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:09.628767', 'step': 4574, 'epoch': 2} {'type': 'loss', 'content': 0.005588748026639223, 'timestamp': '2025-09-30 23:08:09.631202', 'step': 4575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:09.687470', 'step': 4575, 'epoch': 2} {'type': 'loss', 'content': 0.007261520717293024, 'timestamp': '2025-09-30 23:08:09.695025', 'step': 4576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:09.749023', 'step': 4576, 'epoch': 2} {'type': 'loss', 'content': 0.012565048411488533, 'timestamp': '2025-09-30 23:08:09.751270', 'step': 4577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:09.805973', 'step': 4577, 'epoch': 2} {'type': 'loss', 'content': 0.0050188591703772545, 'timestamp': '2025-09-30 23:08:09.809478', 'step': 4578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:09.866689', 'step': 4578, 'epoch': 2} {'type': 'loss', 'content': 0.0005612843087874353, 'timestamp': '2025-09-30 23:08:09.869671', 'step': 4579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:09.926853', 'step': 4579, 'epoch': 2} {'type': 'loss', 'content': 0.0043153283186256886, 'timestamp': '2025-09-30 23:08:09.932421', 'step': 4580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:09.984315', 'step': 4580, 'epoch': 2} {'type': 'loss', 'content': 0.00047373579582199454, 'timestamp': '2025-09-30 23:08:09.986637', 'step': 4581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:10.041151', 'step': 4581, 'epoch': 2} {'type': 'loss', 'content': 0.0043996297754347324, 'timestamp': '2025-09-30 23:08:10.043505', 'step': 4582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:10.096881', 'step': 4582, 'epoch': 2} {'type': 'loss', 'content': 0.012115584686398506, 'timestamp': '2025-09-30 23:08:10.099058', 'step': 4583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:10.151529', 'step': 4583, 'epoch': 2} {'type': 'loss', 'content': 0.01999322697520256, 'timestamp': '2025-09-30 23:08:10.157446', 'step': 4584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:10.209432', 'step': 4584, 'epoch': 2} {'type': 'loss', 'content': 0.029406309127807617, 'timestamp': '2025-09-30 23:08:10.211877', 'step': 4585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:10.264026', 'step': 4585, 'epoch': 2} {'type': 'loss', 'content': 0.006149719934910536, 'timestamp': '2025-09-30 23:08:10.267434', 'step': 4586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:10.323879', 'step': 4586, 'epoch': 2} {'type': 'loss', 'content': 0.045118920505046844, 'timestamp': '2025-09-30 23:08:10.325978', 'step': 4587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:10.381113', 'step': 4587, 'epoch': 2} {'type': 'loss', 'content': 0.004079700447618961, 'timestamp': '2025-09-30 23:08:10.386962', 'step': 4588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:10.439710', 'step': 4588, 'epoch': 2} {'type': 'loss', 'content': 0.001414387603290379, 'timestamp': '2025-09-30 23:08:10.441896', 'step': 4589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:10.496217', 'step': 4589, 'epoch': 2} {'type': 'loss', 'content': 0.03073844313621521, 'timestamp': '2025-09-30 23:08:10.498653', 'step': 4590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:10.552608', 'step': 4590, 'epoch': 2} {'type': 'loss', 'content': 0.009540384635329247, 'timestamp': '2025-09-30 23:08:10.556090', 'step': 4591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:10.609280', 'step': 4591, 'epoch': 2} {'type': 'loss', 'content': 0.030209938064217567, 'timestamp': '2025-09-30 23:08:10.615173', 'step': 4592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:10.667399', 'step': 4592, 'epoch': 2} {'type': 'loss', 'content': 0.019949639216065407, 'timestamp': '2025-09-30 23:08:10.669818', 'step': 4593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:10.723955', 'step': 4593, 'epoch': 2} {'type': 'loss', 'content': 0.02272777259349823, 'timestamp': '2025-09-30 23:08:10.726308', 'step': 4594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:10.779474', 'step': 4594, 'epoch': 2} {'type': 'loss', 'content': 0.008792840875685215, 'timestamp': '2025-09-30 23:08:10.781971', 'step': 4595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:10.834311', 'step': 4595, 'epoch': 2} {'type': 'loss', 'content': 0.00032511603785678744, 'timestamp': '2025-09-30 23:08:10.840594', 'step': 4596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:10.898835', 'step': 4596, 'epoch': 2} {'type': 'loss', 'content': 0.0009093816624954343, 'timestamp': '2025-09-30 23:08:10.901385', 'step': 4597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:10.955187', 'step': 4597, 'epoch': 2} {'type': 'loss', 'content': 0.0024133252445608377, 'timestamp': '2025-09-30 23:08:10.957489', 'step': 4598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:11.010270', 'step': 4598, 'epoch': 2} {'type': 'loss', 'content': 0.0043945107609033585, 'timestamp': '2025-09-30 23:08:11.015082', 'step': 4599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:11.067973', 'step': 4599, 'epoch': 2} {'type': 'loss', 'content': 0.000549536373000592, 'timestamp': '2025-09-30 23:08:11.074105', 'step': 4600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:11.126008', 'step': 4600, 'epoch': 2} {'type': 'loss', 'content': 0.011008906178176403, 'timestamp': '2025-09-30 23:08:11.128295', 'step': 4601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:11.180703', 'step': 4601, 'epoch': 2} {'type': 'loss', 'content': 0.025399914011359215, 'timestamp': '2025-09-30 23:08:11.183031', 'step': 4602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:11.235831', 'step': 4602, 'epoch': 2} {'type': 'loss', 'content': 0.021481236442923546, 'timestamp': '2025-09-30 23:08:11.239133', 'step': 4603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:11.295373', 'step': 4603, 'epoch': 2} {'type': 'loss', 'content': 0.002062847139313817, 'timestamp': '2025-09-30 23:08:11.301887', 'step': 4604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:11.357640', 'step': 4604, 'epoch': 2} {'type': 'loss', 'content': 0.029869521036744118, 'timestamp': '2025-09-30 23:08:11.359866', 'step': 4605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:11.412401', 'step': 4605, 'epoch': 2} {'type': 'loss', 'content': 0.004561923444271088, 'timestamp': '2025-09-30 23:08:11.414850', 'step': 4606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:11.467607', 'step': 4606, 'epoch': 2} {'type': 'loss', 'content': 0.0005458981031551957, 'timestamp': '2025-09-30 23:08:11.470365', 'step': 4607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:11.526191', 'step': 4607, 'epoch': 2} {'type': 'loss', 'content': 0.002890648553147912, 'timestamp': '2025-09-30 23:08:11.532122', 'step': 4608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:11.584740', 'step': 4608, 'epoch': 2} {'type': 'loss', 'content': 0.004588738549500704, 'timestamp': '2025-09-30 23:08:11.589056', 'step': 4609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:11.643059', 'step': 4609, 'epoch': 2} {'type': 'loss', 'content': 0.009296889416873455, 'timestamp': '2025-09-30 23:08:11.646949', 'step': 4610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:11.699564', 'step': 4610, 'epoch': 2} {'type': 'loss', 'content': 0.003661269089207053, 'timestamp': '2025-09-30 23:08:11.702065', 'step': 4611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:11.763599', 'step': 4611, 'epoch': 2} {'type': 'loss', 'content': 0.0016282473225146532, 'timestamp': '2025-09-30 23:08:11.769526', 'step': 4612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:11.823343', 'step': 4612, 'epoch': 2} {'type': 'loss', 'content': 0.0007796023273840547, 'timestamp': '2025-09-30 23:08:11.826464', 'step': 4613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:11.880778', 'step': 4613, 'epoch': 2} {'type': 'loss', 'content': 0.012363244779407978, 'timestamp': '2025-09-30 23:08:11.882971', 'step': 4614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:11.944823', 'step': 4614, 'epoch': 2} {'type': 'loss', 'content': 0.0052408925257623196, 'timestamp': '2025-09-30 23:08:11.947014', 'step': 4615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:12.000136', 'step': 4615, 'epoch': 2} {'type': 'loss', 'content': 0.0006658288766629994, 'timestamp': '2025-09-30 23:08:12.005937', 'step': 4616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:12.057877', 'step': 4616, 'epoch': 2} {'type': 'loss', 'content': 0.00037326800520531833, 'timestamp': '2025-09-30 23:08:12.060660', 'step': 4617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:12.120823', 'step': 4617, 'epoch': 2} {'type': 'loss', 'content': 0.10282453149557114, 'timestamp': '2025-09-30 23:08:12.123407', 'step': 4618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:12.177987', 'step': 4618, 'epoch': 2} {'type': 'loss', 'content': 0.0016050559934228659, 'timestamp': '2025-09-30 23:08:12.180395', 'step': 4619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:12.239011', 'step': 4619, 'epoch': 2} {'type': 'loss', 'content': 0.003290252760052681, 'timestamp': '2025-09-30 23:08:12.244739', 'step': 4620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:12.300291', 'step': 4620, 'epoch': 2} {'type': 'loss', 'content': 0.001112974714487791, 'timestamp': '2025-09-30 23:08:12.305281', 'step': 4621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:12.367211', 'step': 4621, 'epoch': 2} {'type': 'loss', 'content': 0.007695081643760204, 'timestamp': '2025-09-30 23:08:12.370326', 'step': 4622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:12.430599', 'step': 4622, 'epoch': 2} {'type': 'loss', 'content': 0.009851655922830105, 'timestamp': '2025-09-30 23:08:12.436954', 'step': 4623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:12.490658', 'step': 4623, 'epoch': 2} {'type': 'loss', 'content': 0.019896168261766434, 'timestamp': '2025-09-30 23:08:12.496352', 'step': 4624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:12.552290', 'step': 4624, 'epoch': 2} {'type': 'loss', 'content': 0.007561735808849335, 'timestamp': '2025-09-30 23:08:12.554959', 'step': 4625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:12.608507', 'step': 4625, 'epoch': 2} {'type': 'loss', 'content': 0.00016248946485575289, 'timestamp': '2025-09-30 23:08:12.610898', 'step': 4626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:12.667936', 'step': 4626, 'epoch': 2} {'type': 'loss', 'content': 0.00036324982647784054, 'timestamp': '2025-09-30 23:08:12.673083', 'step': 4627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:12.726374', 'step': 4627, 'epoch': 2} {'type': 'loss', 'content': 0.002157915383577347, 'timestamp': '2025-09-30 23:08:12.733295', 'step': 4628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:12.792345', 'step': 4628, 'epoch': 2} {'type': 'loss', 'content': 0.005965102929621935, 'timestamp': '2025-09-30 23:08:12.798814', 'step': 4629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:12.851693', 'step': 4629, 'epoch': 2} {'type': 'loss', 'content': 0.01196583453565836, 'timestamp': '2025-09-30 23:08:12.853928', 'step': 4630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:12.906152', 'step': 4630, 'epoch': 2} {'type': 'loss', 'content': 0.014591039158403873, 'timestamp': '2025-09-30 23:08:12.908525', 'step': 4631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:12.961610', 'step': 4631, 'epoch': 2} {'type': 'loss', 'content': 0.023968713358044624, 'timestamp': '2025-09-30 23:08:12.967478', 'step': 4632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:13.020003', 'step': 4632, 'epoch': 2} {'type': 'loss', 'content': 0.004651193041354418, 'timestamp': '2025-09-30 23:08:13.022388', 'step': 4633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:13.075086', 'step': 4633, 'epoch': 2} {'type': 'loss', 'content': 0.005807935260236263, 'timestamp': '2025-09-30 23:08:13.079004', 'step': 4634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:13.131764', 'step': 4634, 'epoch': 2} {'type': 'loss', 'content': 0.001912916311994195, 'timestamp': '2025-09-30 23:08:13.135390', 'step': 4635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:13.190975', 'step': 4635, 'epoch': 2} {'type': 'loss', 'content': 0.018524277955293655, 'timestamp': '2025-09-30 23:08:13.197354', 'step': 4636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:13.249951', 'step': 4636, 'epoch': 2} {'type': 'loss', 'content': 0.0012933692196384072, 'timestamp': '2025-09-30 23:08:13.253387', 'step': 4637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:13.309317', 'step': 4637, 'epoch': 2} {'type': 'loss', 'content': 0.005682331509888172, 'timestamp': '2025-09-30 23:08:13.311652', 'step': 4638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:13.364209', 'step': 4638, 'epoch': 2} {'type': 'loss', 'content': 0.0540509857237339, 'timestamp': '2025-09-30 23:08:13.366428', 'step': 4639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:13.418951', 'step': 4639, 'epoch': 2} {'type': 'loss', 'content': 0.0055153644643723965, 'timestamp': '2025-09-30 23:08:13.424815', 'step': 4640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:13.487242', 'step': 4640, 'epoch': 2} {'type': 'loss', 'content': 0.050265517085790634, 'timestamp': '2025-09-30 23:08:13.489654', 'step': 4641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:13.543309', 'step': 4641, 'epoch': 2} {'type': 'loss', 'content': 0.04783881455659866, 'timestamp': '2025-09-30 23:08:13.545564', 'step': 4642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:13.602296', 'step': 4642, 'epoch': 2} {'type': 'loss', 'content': 0.0005007548606954515, 'timestamp': '2025-09-30 23:08:13.606044', 'step': 4643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:13.659186', 'step': 4643, 'epoch': 2} {'type': 'loss', 'content': 0.005702285561710596, 'timestamp': '2025-09-30 23:08:13.665015', 'step': 4644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:13.717274', 'step': 4644, 'epoch': 2} {'type': 'loss', 'content': 0.012449209578335285, 'timestamp': '2025-09-30 23:08:13.719769', 'step': 4645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:13.772884', 'step': 4645, 'epoch': 2} {'type': 'loss', 'content': 0.010947798378765583, 'timestamp': '2025-09-30 23:08:13.778283', 'step': 4646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:13.835573', 'step': 4646, 'epoch': 2} {'type': 'loss', 'content': 0.00507371174171567, 'timestamp': '2025-09-30 23:08:13.838030', 'step': 4647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:13.895483', 'step': 4647, 'epoch': 2} {'type': 'loss', 'content': 0.0008727356907911599, 'timestamp': '2025-09-30 23:08:13.901518', 'step': 4648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:13.959039', 'step': 4648, 'epoch': 2} {'type': 'loss', 'content': 0.04624691233038902, 'timestamp': '2025-09-30 23:08:13.961903', 'step': 4649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:14.016681', 'step': 4649, 'epoch': 2} {'type': 'loss', 'content': 0.0034954624716192484, 'timestamp': '2025-09-30 23:08:14.018723', 'step': 4650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:14.071878', 'step': 4650, 'epoch': 2} {'type': 'loss', 'content': 0.0004091103910468519, 'timestamp': '2025-09-30 23:08:14.074369', 'step': 4651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:14.127440', 'step': 4651, 'epoch': 2} {'type': 'loss', 'content': 0.03867156058549881, 'timestamp': '2025-09-30 23:08:14.133425', 'step': 4652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:14.190154', 'step': 4652, 'epoch': 2} {'type': 'loss', 'content': 0.0035364036448299885, 'timestamp': '2025-09-30 23:08:14.192430', 'step': 4653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:14.245044', 'step': 4653, 'epoch': 2} {'type': 'loss', 'content': 0.010368330404162407, 'timestamp': '2025-09-30 23:08:14.247375', 'step': 4654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:14.300868', 'step': 4654, 'epoch': 2} {'type': 'loss', 'content': 0.02958741970360279, 'timestamp': '2025-09-30 23:08:14.303397', 'step': 4655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:14.355460', 'step': 4655, 'epoch': 2} {'type': 'loss', 'content': 0.001753641408868134, 'timestamp': '2025-09-30 23:08:14.361636', 'step': 4656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:14.415222', 'step': 4656, 'epoch': 2} {'type': 'loss', 'content': 0.016148632392287254, 'timestamp': '2025-09-30 23:08:14.418950', 'step': 4657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:14.471823', 'step': 4657, 'epoch': 2} {'type': 'loss', 'content': 0.0005769552662968636, 'timestamp': '2025-09-30 23:08:14.474146', 'step': 4658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:14.529009', 'step': 4658, 'epoch': 2} {'type': 'loss', 'content': 0.005095383618026972, 'timestamp': '2025-09-30 23:08:14.531549', 'step': 4659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:14.584243', 'step': 4659, 'epoch': 2} {'type': 'loss', 'content': 0.009607668034732342, 'timestamp': '2025-09-30 23:08:14.590154', 'step': 4660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:14.641945', 'step': 4660, 'epoch': 2} {'type': 'loss', 'content': 0.03053543157875538, 'timestamp': '2025-09-30 23:08:14.644206', 'step': 4661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:14.697311', 'step': 4661, 'epoch': 2} {'type': 'loss', 'content': 0.01623288355767727, 'timestamp': '2025-09-30 23:08:14.700149', 'step': 4662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:14.756131', 'step': 4662, 'epoch': 2} {'type': 'loss', 'content': 0.02052086964249611, 'timestamp': '2025-09-30 23:08:14.758396', 'step': 4663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:14.811299', 'step': 4663, 'epoch': 2} {'type': 'loss', 'content': 0.0035978013183921576, 'timestamp': '2025-09-30 23:08:14.816878', 'step': 4664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:14.870577', 'step': 4664, 'epoch': 2} {'type': 'loss', 'content': 0.014943404123187065, 'timestamp': '2025-09-30 23:08:14.872848', 'step': 4665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:14.926660', 'step': 4665, 'epoch': 2} {'type': 'loss', 'content': 0.0009524609195068479, 'timestamp': '2025-09-30 23:08:14.930226', 'step': 4666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:14.985407', 'step': 4666, 'epoch': 2} {'type': 'loss', 'content': 0.02459990419447422, 'timestamp': '2025-09-30 23:08:14.987642', 'step': 4667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:15.040905', 'step': 4667, 'epoch': 2} {'type': 'loss', 'content': 0.002206050790846348, 'timestamp': '2025-09-30 23:08:15.047619', 'step': 4668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:15.099630', 'step': 4668, 'epoch': 2} {'type': 'loss', 'content': 0.04093315824866295, 'timestamp': '2025-09-30 23:08:15.102303', 'step': 4669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:15.154886', 'step': 4669, 'epoch': 2} {'type': 'loss', 'content': 0.008336080238223076, 'timestamp': '2025-09-30 23:08:15.157291', 'step': 4670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:15.211008', 'step': 4670, 'epoch': 2} {'type': 'loss', 'content': 0.009445572271943092, 'timestamp': '2025-09-30 23:08:15.213353', 'step': 4671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:15.266194', 'step': 4671, 'epoch': 2} {'type': 'loss', 'content': 0.04236976429820061, 'timestamp': '2025-09-30 23:08:15.271876', 'step': 4672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:15.324474', 'step': 4672, 'epoch': 2} {'type': 'loss', 'content': 0.025383038446307182, 'timestamp': '2025-09-30 23:08:15.326648', 'step': 4673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:15.379252', 'step': 4673, 'epoch': 2} {'type': 'loss', 'content': 0.013463241048157215, 'timestamp': '2025-09-30 23:08:15.381653', 'step': 4674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:15.435493', 'step': 4674, 'epoch': 2} {'type': 'loss', 'content': 0.013972034677863121, 'timestamp': '2025-09-30 23:08:15.437825', 'step': 4675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:15.491156', 'step': 4675, 'epoch': 2} {'type': 'loss', 'content': 0.001955019775778055, 'timestamp': '2025-09-30 23:08:15.496904', 'step': 4676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:15.548999', 'step': 4676, 'epoch': 2} {'type': 'loss', 'content': 0.008307593874633312, 'timestamp': '2025-09-30 23:08:15.551342', 'step': 4677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:15.603789', 'step': 4677, 'epoch': 2} {'type': 'loss', 'content': 0.010345490649342537, 'timestamp': '2025-09-30 23:08:15.606163', 'step': 4678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:15.658904', 'step': 4678, 'epoch': 2} {'type': 'loss', 'content': 0.011263257823884487, 'timestamp': '2025-09-30 23:08:15.661235', 'step': 4679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:15.716179', 'step': 4679, 'epoch': 2} {'type': 'loss', 'content': 0.005146931391209364, 'timestamp': '2025-09-30 23:08:15.721890', 'step': 4680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:15.776194', 'step': 4680, 'epoch': 2} {'type': 'loss', 'content': 0.001581163378432393, 'timestamp': '2025-09-30 23:08:15.778428', 'step': 4681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:15.832337', 'step': 4681, 'epoch': 2} {'type': 'loss', 'content': 0.0685245469212532, 'timestamp': '2025-09-30 23:08:15.834524', 'step': 4682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:15.887184', 'step': 4682, 'epoch': 2} {'type': 'loss', 'content': 0.02181093581020832, 'timestamp': '2025-09-30 23:08:15.889236', 'step': 4683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:15.942562', 'step': 4683, 'epoch': 2} {'type': 'loss', 'content': 0.012555132620036602, 'timestamp': '2025-09-30 23:08:15.948497', 'step': 4684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:16.000638', 'step': 4684, 'epoch': 2} {'type': 'loss', 'content': 0.005055964458733797, 'timestamp': '2025-09-30 23:08:16.003748', 'step': 4685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:16.056480', 'step': 4685, 'epoch': 2} {'type': 'loss', 'content': 0.0006583466893061996, 'timestamp': '2025-09-30 23:08:16.058718', 'step': 4686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:16.111439', 'step': 4686, 'epoch': 2} {'type': 'loss', 'content': 0.01212206482887268, 'timestamp': '2025-09-30 23:08:16.113846', 'step': 4687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:16.167409', 'step': 4687, 'epoch': 2} {'type': 'loss', 'content': 0.0017646660562604666, 'timestamp': '2025-09-30 23:08:16.173346', 'step': 4688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:16.225939', 'step': 4688, 'epoch': 2} {'type': 'loss', 'content': 0.034616339951753616, 'timestamp': '2025-09-30 23:08:16.228271', 'step': 4689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:16.281765', 'step': 4689, 'epoch': 2} {'type': 'loss', 'content': 0.04108281806111336, 'timestamp': '2025-09-30 23:08:16.283950', 'step': 4690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:16.337027', 'step': 4690, 'epoch': 2} {'type': 'loss', 'content': 0.0029262122698128223, 'timestamp': '2025-09-30 23:08:16.339488', 'step': 4691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:16.392001', 'step': 4691, 'epoch': 2} {'type': 'loss', 'content': 0.05713367089629173, 'timestamp': '2025-09-30 23:08:16.397860', 'step': 4692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:16.450805', 'step': 4692, 'epoch': 2} {'type': 'loss', 'content': 0.0011199290165677667, 'timestamp': '2025-09-30 23:08:16.456716', 'step': 4693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:16.509557', 'step': 4693, 'epoch': 2} {'type': 'loss', 'content': 0.05846729874610901, 'timestamp': '2025-09-30 23:08:16.512063', 'step': 4694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:16.565591', 'step': 4694, 'epoch': 2} {'type': 'loss', 'content': 0.0034980401396751404, 'timestamp': '2025-09-30 23:08:16.567900', 'step': 4695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:16.620636', 'step': 4695, 'epoch': 2} {'type': 'loss', 'content': 0.006434544920921326, 'timestamp': '2025-09-30 23:08:16.626487', 'step': 4696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:16.679047', 'step': 4696, 'epoch': 2} {'type': 'loss', 'content': 0.01075405441224575, 'timestamp': '2025-09-30 23:08:16.681717', 'step': 4697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:16.734369', 'step': 4697, 'epoch': 2} {'type': 'loss', 'content': 0.016080988571047783, 'timestamp': '2025-09-30 23:08:16.736891', 'step': 4698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:16.789566', 'step': 4698, 'epoch': 2} {'type': 'loss', 'content': 0.01875290647149086, 'timestamp': '2025-09-30 23:08:16.792513', 'step': 4699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:16.845481', 'step': 4699, 'epoch': 2} {'type': 'loss', 'content': 0.02445163205265999, 'timestamp': '2025-09-30 23:08:16.851233', 'step': 4700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:16.903711', 'step': 4700, 'epoch': 2} {'type': 'loss', 'content': 0.007522748317569494, 'timestamp': '2025-09-30 23:08:16.906000', 'step': 4701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:16.958646', 'step': 4701, 'epoch': 2} {'type': 'loss', 'content': 0.03561665862798691, 'timestamp': '2025-09-30 23:08:16.961116', 'step': 4702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:17.014286', 'step': 4702, 'epoch': 2} {'type': 'loss', 'content': 0.03546447679400444, 'timestamp': '2025-09-30 23:08:17.017006', 'step': 4703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:17.071293', 'step': 4703, 'epoch': 2} {'type': 'loss', 'content': 0.004212338477373123, 'timestamp': '2025-09-30 23:08:17.077927', 'step': 4704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:17.131774', 'step': 4704, 'epoch': 2} {'type': 'loss', 'content': 0.04202839732170105, 'timestamp': '2025-09-30 23:08:17.134135', 'step': 4705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:17.186402', 'step': 4705, 'epoch': 2} {'type': 'loss', 'content': 0.04532995820045471, 'timestamp': '2025-09-30 23:08:17.189232', 'step': 4706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:17.242967', 'step': 4706, 'epoch': 2} {'type': 'loss', 'content': 0.014637494459748268, 'timestamp': '2025-09-30 23:08:17.245448', 'step': 4707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:17.298501', 'step': 4707, 'epoch': 2} {'type': 'loss', 'content': 0.038784828037023544, 'timestamp': '2025-09-30 23:08:17.304717', 'step': 4708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:17.357354', 'step': 4708, 'epoch': 2} {'type': 'loss', 'content': 0.008559989742934704, 'timestamp': '2025-09-30 23:08:17.359658', 'step': 4709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:17.412249', 'step': 4709, 'epoch': 2} {'type': 'loss', 'content': 0.002115595620125532, 'timestamp': '2025-09-30 23:08:17.414447', 'step': 4710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:17.467265', 'step': 4710, 'epoch': 2} {'type': 'loss', 'content': 0.021414874121546745, 'timestamp': '2025-09-30 23:08:17.469379', 'step': 4711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:17.526122', 'step': 4711, 'epoch': 2} {'type': 'loss', 'content': 0.005275689996778965, 'timestamp': '2025-09-30 23:08:17.531871', 'step': 4712, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1596914505344}], 'timestamp': '2025-09-30 23:08:21.294786', 'step': 4712, 'epoch': 2} {'type': 'pplx', 'content': 8687239.574399216, 'timestamp': '2025-09-30 23:08:21.297237', 'step': 4712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:21.351245', 'step': 4712, 'epoch': 2} {'type': 'loss', 'content': 0.01238920260220766, 'timestamp': '2025-09-30 23:08:21.353736', 'step': 4713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:21.407220', 'step': 4713, 'epoch': 2} {'type': 'loss', 'content': 0.014601674862205982, 'timestamp': '2025-09-30 23:08:21.412718', 'step': 4714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:21.474921', 'step': 4714, 'epoch': 2} {'type': 'loss', 'content': 0.003040279960259795, 'timestamp': '2025-09-30 23:08:21.477184', 'step': 4715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:21.529789', 'step': 4715, 'epoch': 2} {'type': 'loss', 'content': 0.009836445562541485, 'timestamp': '2025-09-30 23:08:21.535805', 'step': 4716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:21.588928', 'step': 4716, 'epoch': 2} {'type': 'loss', 'content': 0.02468467690050602, 'timestamp': '2025-09-30 23:08:21.591455', 'step': 4717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:21.644295', 'step': 4717, 'epoch': 2} {'type': 'loss', 'content': 0.010704206302762032, 'timestamp': '2025-09-30 23:08:21.646668', 'step': 4718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:21.699269', 'step': 4718, 'epoch': 2} {'type': 'loss', 'content': 0.0024127380456775427, 'timestamp': '2025-09-30 23:08:21.701723', 'step': 4719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:21.754244', 'step': 4719, 'epoch': 2} {'type': 'loss', 'content': 0.02052907645702362, 'timestamp': '2025-09-30 23:08:21.759970', 'step': 4720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:21.811589', 'step': 4720, 'epoch': 2} {'type': 'loss', 'content': 0.007632750552147627, 'timestamp': '2025-09-30 23:08:21.813993', 'step': 4721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:21.866736', 'step': 4721, 'epoch': 2} {'type': 'loss', 'content': 0.013094217516481876, 'timestamp': '2025-09-30 23:08:21.869064', 'step': 4722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:21.921487', 'step': 4722, 'epoch': 2} {'type': 'loss', 'content': 0.016685063019394875, 'timestamp': '2025-09-30 23:08:21.924092', 'step': 4723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:21.976820', 'step': 4723, 'epoch': 2} {'type': 'loss', 'content': 0.008009245619177818, 'timestamp': '2025-09-30 23:08:21.982985', 'step': 4724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:22.035205', 'step': 4724, 'epoch': 2} {'type': 'loss', 'content': 0.020121823996305466, 'timestamp': '2025-09-30 23:08:22.037873', 'step': 4725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:22.090546', 'step': 4725, 'epoch': 2} {'type': 'loss', 'content': 0.04314210265874863, 'timestamp': '2025-09-30 23:08:22.092994', 'step': 4726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:22.146047', 'step': 4726, 'epoch': 2} {'type': 'loss', 'content': 0.013969727791845798, 'timestamp': '2025-09-30 23:08:22.148227', 'step': 4727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:08:22.205746', 'step': 4727, 'epoch': 2} {'type': 'loss', 'content': 0.0037942719645798206, 'timestamp': '2025-09-30 23:08:22.211558', 'step': 4728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:22.263810', 'step': 4728, 'epoch': 2} {'type': 'loss', 'content': 0.004145575687289238, 'timestamp': '2025-09-30 23:08:22.266148', 'step': 4729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:22.318621', 'step': 4729, 'epoch': 2} {'type': 'loss', 'content': 0.017158612608909607, 'timestamp': '2025-09-30 23:08:22.321033', 'step': 4730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:22.373801', 'step': 4730, 'epoch': 2} {'type': 'loss', 'content': 0.026379292830824852, 'timestamp': '2025-09-30 23:08:22.376116', 'step': 4731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:22.428804', 'step': 4731, 'epoch': 2} {'type': 'loss', 'content': 0.016238167881965637, 'timestamp': '2025-09-30 23:08:22.434769', 'step': 4732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:22.487215', 'step': 4732, 'epoch': 2} {'type': 'loss', 'content': 0.005099652800709009, 'timestamp': '2025-09-30 23:08:22.489367', 'step': 4733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:22.543433', 'step': 4733, 'epoch': 2} {'type': 'loss', 'content': 0.012646052055060863, 'timestamp': '2025-09-30 23:08:22.545937', 'step': 4734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:22.599497', 'step': 4734, 'epoch': 2} {'type': 'loss', 'content': 0.010225682519376278, 'timestamp': '2025-09-30 23:08:22.601719', 'step': 4735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:22.654506', 'step': 4735, 'epoch': 2} {'type': 'loss', 'content': 0.009735671803355217, 'timestamp': '2025-09-30 23:08:22.661553', 'step': 4736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:22.714601', 'step': 4736, 'epoch': 2} {'type': 'loss', 'content': 0.008562685921788216, 'timestamp': '2025-09-30 23:08:22.717082', 'step': 4737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:22.770827', 'step': 4737, 'epoch': 2} {'type': 'loss', 'content': 0.002156525384634733, 'timestamp': '2025-09-30 23:08:22.773091', 'step': 4738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:22.825931', 'step': 4738, 'epoch': 2} {'type': 'loss', 'content': 0.03681305795907974, 'timestamp': '2025-09-30 23:08:22.828340', 'step': 4739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:22.880798', 'step': 4739, 'epoch': 2} {'type': 'loss', 'content': 0.03216704726219177, 'timestamp': '2025-09-30 23:08:22.889824', 'step': 4740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:22.951890', 'step': 4740, 'epoch': 2} {'type': 'loss', 'content': 0.0380021296441555, 'timestamp': '2025-09-30 23:08:22.954239', 'step': 4741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:23.007933', 'step': 4741, 'epoch': 2} {'type': 'loss', 'content': 0.003404016140848398, 'timestamp': '2025-09-30 23:08:23.010187', 'step': 4742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:23.064908', 'step': 4742, 'epoch': 2} {'type': 'loss', 'content': 0.0019557897467166185, 'timestamp': '2025-09-30 23:08:23.067248', 'step': 4743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:23.120544', 'step': 4743, 'epoch': 2} {'type': 'loss', 'content': 0.001217856421135366, 'timestamp': '2025-09-30 23:08:23.126358', 'step': 4744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:23.178784', 'step': 4744, 'epoch': 2} {'type': 'loss', 'content': 0.0032636248506605625, 'timestamp': '2025-09-30 23:08:23.181005', 'step': 4745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:23.236749', 'step': 4745, 'epoch': 2} {'type': 'loss', 'content': 0.015301321633160114, 'timestamp': '2025-09-30 23:08:23.239175', 'step': 4746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:23.291681', 'step': 4746, 'epoch': 2} {'type': 'loss', 'content': 0.014613782986998558, 'timestamp': '2025-09-30 23:08:23.294024', 'step': 4747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:23.346558', 'step': 4747, 'epoch': 2} {'type': 'loss', 'content': 0.019485237076878548, 'timestamp': '2025-09-30 23:08:23.352721', 'step': 4748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:23.405248', 'step': 4748, 'epoch': 2} {'type': 'loss', 'content': 0.02733526937663555, 'timestamp': '2025-09-30 23:08:23.407524', 'step': 4749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:23.460562', 'step': 4749, 'epoch': 2} {'type': 'loss', 'content': 0.031577177345752716, 'timestamp': '2025-09-30 23:08:23.463204', 'step': 4750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:23.517666', 'step': 4750, 'epoch': 2} {'type': 'loss', 'content': 0.00033240168704651296, 'timestamp': '2025-09-30 23:08:23.520016', 'step': 4751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:23.572479', 'step': 4751, 'epoch': 2} {'type': 'loss', 'content': 0.042282991111278534, 'timestamp': '2025-09-30 23:08:23.578219', 'step': 4752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:23.630600', 'step': 4752, 'epoch': 2} {'type': 'loss', 'content': 0.01059426087886095, 'timestamp': '2025-09-30 23:08:23.633032', 'step': 4753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:23.685390', 'step': 4753, 'epoch': 2} {'type': 'loss', 'content': 0.00018038049165625125, 'timestamp': '2025-09-30 23:08:23.690748', 'step': 4754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:23.748159', 'step': 4754, 'epoch': 2} {'type': 'loss', 'content': 0.017901094630360603, 'timestamp': '2025-09-30 23:08:23.751633', 'step': 4755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:23.808122', 'step': 4755, 'epoch': 2} {'type': 'loss', 'content': 0.00228729285299778, 'timestamp': '2025-09-30 23:08:23.815620', 'step': 4756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:23.870819', 'step': 4756, 'epoch': 2} {'type': 'loss', 'content': 0.003188803791999817, 'timestamp': '2025-09-30 23:08:23.875781', 'step': 4757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:23.934891', 'step': 4757, 'epoch': 2} {'type': 'loss', 'content': 0.0018235718598589301, 'timestamp': '2025-09-30 23:08:23.937965', 'step': 4758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:23.995578', 'step': 4758, 'epoch': 2} {'type': 'loss', 'content': 0.015828361734747887, 'timestamp': '2025-09-30 23:08:23.999624', 'step': 4759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:24.056233', 'step': 4759, 'epoch': 2} {'type': 'loss', 'content': 0.012820631265640259, 'timestamp': '2025-09-30 23:08:24.064210', 'step': 4760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:24.123491', 'step': 4760, 'epoch': 2} {'type': 'loss', 'content': 0.00035536123323254287, 'timestamp': '2025-09-30 23:08:24.127396', 'step': 4761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:24.183405', 'step': 4761, 'epoch': 2} {'type': 'loss', 'content': 0.04202043637633324, 'timestamp': '2025-09-30 23:08:24.187816', 'step': 4762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:24.245661', 'step': 4762, 'epoch': 2} {'type': 'loss', 'content': 0.011565904133021832, 'timestamp': '2025-09-30 23:08:24.250892', 'step': 4763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:24.309561', 'step': 4763, 'epoch': 2} {'type': 'loss', 'content': 0.008924107067286968, 'timestamp': '2025-09-30 23:08:24.318763', 'step': 4764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:24.378723', 'step': 4764, 'epoch': 2} {'type': 'loss', 'content': 0.00456875329837203, 'timestamp': '2025-09-30 23:08:24.384666', 'step': 4765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:24.445731', 'step': 4765, 'epoch': 2} {'type': 'loss', 'content': 0.012244416400790215, 'timestamp': '2025-09-30 23:08:24.451684', 'step': 4766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:24.510259', 'step': 4766, 'epoch': 2} {'type': 'loss', 'content': 0.007603633217513561, 'timestamp': '2025-09-30 23:08:24.514354', 'step': 4767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:24.573332', 'step': 4767, 'epoch': 2} {'type': 'loss', 'content': 0.007222726475447416, 'timestamp': '2025-09-30 23:08:24.582063', 'step': 4768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:24.640943', 'step': 4768, 'epoch': 2} {'type': 'loss', 'content': 0.0052396925166249275, 'timestamp': '2025-09-30 23:08:24.647373', 'step': 4769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:24.708919', 'step': 4769, 'epoch': 2} {'type': 'loss', 'content': 0.02374541573226452, 'timestamp': '2025-09-30 23:08:24.712698', 'step': 4770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:24.771998', 'step': 4770, 'epoch': 2} {'type': 'loss', 'content': 0.03314400464296341, 'timestamp': '2025-09-30 23:08:24.781639', 'step': 4771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:24.844228', 'step': 4771, 'epoch': 2} {'type': 'loss', 'content': 0.03616654872894287, 'timestamp': '2025-09-30 23:08:24.853651', 'step': 4772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:24.934980', 'step': 4772, 'epoch': 2} {'type': 'loss', 'content': 0.025755634531378746, 'timestamp': '2025-09-30 23:08:24.953720', 'step': 4773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:25.049762', 'step': 4773, 'epoch': 2} {'type': 'loss', 'content': 0.009111464954912663, 'timestamp': '2025-09-30 23:08:25.058597', 'step': 4774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:25.130299', 'step': 4774, 'epoch': 2} {'type': 'loss', 'content': 0.002625564578920603, 'timestamp': '2025-09-30 23:08:25.142239', 'step': 4775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:25.215701', 'step': 4775, 'epoch': 2} {'type': 'loss', 'content': 0.011214890517294407, 'timestamp': '2025-09-30 23:08:25.232487', 'step': 4776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:25.311825', 'step': 4776, 'epoch': 2} {'type': 'loss', 'content': 0.0029198196716606617, 'timestamp': '2025-09-30 23:08:25.323948', 'step': 4777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:25.410031', 'step': 4777, 'epoch': 2} {'type': 'loss', 'content': 0.11364907026290894, 'timestamp': '2025-09-30 23:08:25.425076', 'step': 4778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:25.486670', 'step': 4778, 'epoch': 2} {'type': 'loss', 'content': 0.018135422840714455, 'timestamp': '2025-09-30 23:08:25.497210', 'step': 4779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:25.576807', 'step': 4779, 'epoch': 2} {'type': 'loss', 'content': 0.006161689292639494, 'timestamp': '2025-09-30 23:08:25.589645', 'step': 4780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:25.664545', 'step': 4780, 'epoch': 2} {'type': 'loss', 'content': 0.010973933152854443, 'timestamp': '2025-09-30 23:08:25.667625', 'step': 4781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:25.739751', 'step': 4781, 'epoch': 2} {'type': 'loss', 'content': 0.019603300839662552, 'timestamp': '2025-09-30 23:08:25.748892', 'step': 4782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:25.823260', 'step': 4782, 'epoch': 2} {'type': 'loss', 'content': 0.0043717557564377785, 'timestamp': '2025-09-30 23:08:25.833747', 'step': 4783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:25.904271', 'step': 4783, 'epoch': 2} {'type': 'loss', 'content': 0.05012453347444534, 'timestamp': '2025-09-30 23:08:25.916585', 'step': 4784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:25.991287', 'step': 4784, 'epoch': 2} {'type': 'loss', 'content': 0.017038822174072266, 'timestamp': '2025-09-30 23:08:25.994210', 'step': 4785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:26.072860', 'step': 4785, 'epoch': 2} {'type': 'loss', 'content': 0.011235283687710762, 'timestamp': '2025-09-30 23:08:26.079398', 'step': 4786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:26.140406', 'step': 4786, 'epoch': 2} {'type': 'loss', 'content': 0.002151584718376398, 'timestamp': '2025-09-30 23:08:26.150745', 'step': 4787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:26.229449', 'step': 4787, 'epoch': 2} {'type': 'loss', 'content': 0.002188673708587885, 'timestamp': '2025-09-30 23:08:26.237190', 'step': 4788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:26.305219', 'step': 4788, 'epoch': 2} {'type': 'loss', 'content': 0.0005183346220292151, 'timestamp': '2025-09-30 23:08:26.315084', 'step': 4789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:08:26.386612', 'step': 4789, 'epoch': 2} {'type': 'loss', 'content': 0.025558089837431908, 'timestamp': '2025-09-30 23:08:26.393879', 'step': 4790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:26.470750', 'step': 4790, 'epoch': 2} {'type': 'loss', 'content': 0.0009195757447741926, 'timestamp': '2025-09-30 23:08:26.482839', 'step': 4791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:26.554252', 'step': 4791, 'epoch': 2} {'type': 'loss', 'content': 0.02925143949687481, 'timestamp': '2025-09-30 23:08:26.566862', 'step': 4792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:26.638901', 'step': 4792, 'epoch': 2} {'type': 'loss', 'content': 0.003042537486180663, 'timestamp': '2025-09-30 23:08:26.647993', 'step': 4793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:26.722909', 'step': 4793, 'epoch': 2} {'type': 'loss', 'content': 0.0009959681192412972, 'timestamp': '2025-09-30 23:08:26.732130', 'step': 4794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:26.805439', 'step': 4794, 'epoch': 2} {'type': 'loss', 'content': 0.013571560382843018, 'timestamp': '2025-09-30 23:08:26.815258', 'step': 4795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:26.885533', 'step': 4795, 'epoch': 2} {'type': 'loss', 'content': 0.07066080719232559, 'timestamp': '2025-09-30 23:08:26.897460', 'step': 4796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:26.959710', 'step': 4796, 'epoch': 2} {'type': 'loss', 'content': 0.09294401854276657, 'timestamp': '2025-09-30 23:08:26.968105', 'step': 4797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:27.040971', 'step': 4797, 'epoch': 2} {'type': 'loss', 'content': 0.0331389382481575, 'timestamp': '2025-09-30 23:08:27.050416', 'step': 4798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:27.121430', 'step': 4798, 'epoch': 2} {'type': 'loss', 'content': 0.02843538299202919, 'timestamp': '2025-09-30 23:08:27.134767', 'step': 4799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:27.214079', 'step': 4799, 'epoch': 2} {'type': 'loss', 'content': 0.007910069078207016, 'timestamp': '2025-09-30 23:08:27.224534', 'step': 4800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:27.296370', 'step': 4800, 'epoch': 2} {'type': 'loss', 'content': 0.01823078654706478, 'timestamp': '2025-09-30 23:08:27.303568', 'step': 4801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:27.379714', 'step': 4801, 'epoch': 2} {'type': 'loss', 'content': 0.004808404948562384, 'timestamp': '2025-09-30 23:08:27.385931', 'step': 4802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:27.460059', 'step': 4802, 'epoch': 2} {'type': 'loss', 'content': 0.00820067711174488, 'timestamp': '2025-09-30 23:08:27.468851', 'step': 4803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:27.542788', 'step': 4803, 'epoch': 2} {'type': 'loss', 'content': 0.011476407758891582, 'timestamp': '2025-09-30 23:08:27.553801', 'step': 4804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:27.615094', 'step': 4804, 'epoch': 2} {'type': 'loss', 'content': 0.007373590487986803, 'timestamp': '2025-09-30 23:08:27.621442', 'step': 4805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:27.693074', 'step': 4805, 'epoch': 2} {'type': 'loss', 'content': 0.011643207632005215, 'timestamp': '2025-09-30 23:08:27.711764', 'step': 4806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:27.817733', 'step': 4806, 'epoch': 2} {'type': 'loss', 'content': 0.026417210698127747, 'timestamp': '2025-09-30 23:08:27.837940', 'step': 4807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:27.952282', 'step': 4807, 'epoch': 2} {'type': 'loss', 'content': 0.0008397831697948277, 'timestamp': '2025-09-30 23:08:27.975227', 'step': 4808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:28.068280', 'step': 4808, 'epoch': 2} {'type': 'loss', 'content': 0.011999421752989292, 'timestamp': '2025-09-30 23:08:28.081431', 'step': 4809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:28.158080', 'step': 4809, 'epoch': 2} {'type': 'loss', 'content': 0.037646204233169556, 'timestamp': '2025-09-30 23:08:28.165886', 'step': 4810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:28.239080', 'step': 4810, 'epoch': 2} {'type': 'loss', 'content': 0.004236442036926746, 'timestamp': '2025-09-30 23:08:28.248711', 'step': 4811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:28.317401', 'step': 4811, 'epoch': 2} {'type': 'loss', 'content': 0.005048082675784826, 'timestamp': '2025-09-30 23:08:28.327365', 'step': 4812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:28.395868', 'step': 4812, 'epoch': 2} {'type': 'loss', 'content': 0.009218277409672737, 'timestamp': '2025-09-30 23:08:28.402983', 'step': 4813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:28.466707', 'step': 4813, 'epoch': 2} {'type': 'loss', 'content': 0.008825989440083504, 'timestamp': '2025-09-30 23:08:28.470859', 'step': 4814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:28.532826', 'step': 4814, 'epoch': 2} {'type': 'loss', 'content': 0.019189568236470222, 'timestamp': '2025-09-30 23:08:28.537333', 'step': 4815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:28.596468', 'step': 4815, 'epoch': 2} {'type': 'loss', 'content': 0.0069051459431648254, 'timestamp': '2025-09-30 23:08:28.603653', 'step': 4816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:28.662021', 'step': 4816, 'epoch': 2} {'type': 'loss', 'content': 0.0486823208630085, 'timestamp': '2025-09-30 23:08:28.664889', 'step': 4817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:28.719399', 'step': 4817, 'epoch': 2} {'type': 'loss', 'content': 0.026585930958390236, 'timestamp': '2025-09-30 23:08:28.722606', 'step': 4818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:28.779475', 'step': 4818, 'epoch': 2} {'type': 'loss', 'content': 0.0038982066325843334, 'timestamp': '2025-09-30 23:08:28.782254', 'step': 4819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:28.838174', 'step': 4819, 'epoch': 2} {'type': 'loss', 'content': 0.013580295257270336, 'timestamp': '2025-09-30 23:08:28.844594', 'step': 4820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:28.898177', 'step': 4820, 'epoch': 2} {'type': 'loss', 'content': 0.0099329873919487, 'timestamp': '2025-09-30 23:08:28.900924', 'step': 4821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:28.955114', 'step': 4821, 'epoch': 2} {'type': 'loss', 'content': 0.00033815024653449655, 'timestamp': '2025-09-30 23:08:28.958957', 'step': 4822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:29.020397', 'step': 4822, 'epoch': 2} {'type': 'loss', 'content': 0.005322316195815802, 'timestamp': '2025-09-30 23:08:29.023722', 'step': 4823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:29.079611', 'step': 4823, 'epoch': 2} {'type': 'loss', 'content': 0.023193076252937317, 'timestamp': '2025-09-30 23:08:29.089746', 'step': 4824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:29.149537', 'step': 4824, 'epoch': 2} {'type': 'loss', 'content': 0.010102726519107819, 'timestamp': '2025-09-30 23:08:29.153075', 'step': 4825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:29.217881', 'step': 4825, 'epoch': 2} {'type': 'loss', 'content': 0.0012973938137292862, 'timestamp': '2025-09-30 23:08:29.224883', 'step': 4826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:29.294317', 'step': 4826, 'epoch': 2} {'type': 'loss', 'content': 0.025440791621804237, 'timestamp': '2025-09-30 23:08:29.301048', 'step': 4827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:29.373327', 'step': 4827, 'epoch': 2} {'type': 'loss', 'content': 0.03065255843102932, 'timestamp': '2025-09-30 23:08:29.384944', 'step': 4828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:29.478124', 'step': 4828, 'epoch': 2} {'type': 'loss', 'content': 0.007006573025137186, 'timestamp': '2025-09-30 23:08:29.482722', 'step': 4829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:29.538571', 'step': 4829, 'epoch': 2} {'type': 'loss', 'content': 0.012661519460380077, 'timestamp': '2025-09-30 23:08:29.541410', 'step': 4830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:29.597025', 'step': 4830, 'epoch': 2} {'type': 'loss', 'content': 0.004912558943033218, 'timestamp': '2025-09-30 23:08:29.601851', 'step': 4831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:29.674118', 'step': 4831, 'epoch': 2} {'type': 'loss', 'content': 0.015648825094103813, 'timestamp': '2025-09-30 23:08:29.680891', 'step': 4832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:29.740246', 'step': 4832, 'epoch': 2} {'type': 'loss', 'content': 0.004463537596166134, 'timestamp': '2025-09-30 23:08:29.747418', 'step': 4833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:29.818419', 'step': 4833, 'epoch': 2} {'type': 'loss', 'content': 0.023906392976641655, 'timestamp': '2025-09-30 23:08:29.825082', 'step': 4834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:29.887753', 'step': 4834, 'epoch': 2} {'type': 'loss', 'content': 0.014838166534900665, 'timestamp': '2025-09-30 23:08:29.891055', 'step': 4835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:29.959922', 'step': 4835, 'epoch': 2} {'type': 'loss', 'content': 0.027761679142713547, 'timestamp': '2025-09-30 23:08:29.971166', 'step': 4836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:30.035690', 'step': 4836, 'epoch': 2} {'type': 'loss', 'content': 0.014304270967841148, 'timestamp': '2025-09-30 23:08:30.043373', 'step': 4837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:30.110570', 'step': 4837, 'epoch': 2} {'type': 'loss', 'content': 0.002469074446707964, 'timestamp': '2025-09-30 23:08:30.119633', 'step': 4838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:30.194013', 'step': 4838, 'epoch': 2} {'type': 'loss', 'content': 0.005933239124715328, 'timestamp': '2025-09-30 23:08:30.197193', 'step': 4839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:30.262425', 'step': 4839, 'epoch': 2} {'type': 'loss', 'content': 0.0029364863876253366, 'timestamp': '2025-09-30 23:08:30.272183', 'step': 4840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:30.342026', 'step': 4840, 'epoch': 2} {'type': 'loss', 'content': 0.0334148183465004, 'timestamp': '2025-09-30 23:08:30.345773', 'step': 4841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:30.404400', 'step': 4841, 'epoch': 2} {'type': 'loss', 'content': 0.003300239099189639, 'timestamp': '2025-09-30 23:08:30.409591', 'step': 4842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:30.478782', 'step': 4842, 'epoch': 2} {'type': 'loss', 'content': 0.006213701795786619, 'timestamp': '2025-09-30 23:08:30.482501', 'step': 4843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:30.553803', 'step': 4843, 'epoch': 2} {'type': 'loss', 'content': 0.009862877428531647, 'timestamp': '2025-09-30 23:08:30.562407', 'step': 4844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:30.640350', 'step': 4844, 'epoch': 2} {'type': 'loss', 'content': 0.009407171979546547, 'timestamp': '2025-09-30 23:08:30.642737', 'step': 4845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:30.703413', 'step': 4845, 'epoch': 2} {'type': 'loss', 'content': 0.07663381099700928, 'timestamp': '2025-09-30 23:08:30.708032', 'step': 4846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:30.766363', 'step': 4846, 'epoch': 2} {'type': 'loss', 'content': 0.07244609296321869, 'timestamp': '2025-09-30 23:08:30.772277', 'step': 4847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:30.836883', 'step': 4847, 'epoch': 2} {'type': 'loss', 'content': 0.0038278442807495594, 'timestamp': '2025-09-30 23:08:30.849023', 'step': 4848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:30.913132', 'step': 4848, 'epoch': 2} {'type': 'loss', 'content': 0.0018031338695436716, 'timestamp': '2025-09-30 23:08:30.915758', 'step': 4849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:30.991021', 'step': 4849, 'epoch': 2} {'type': 'loss', 'content': 0.017287123948335648, 'timestamp': '2025-09-30 23:08:30.995072', 'step': 4850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:31.057842', 'step': 4850, 'epoch': 2} {'type': 'loss', 'content': 0.000993545982055366, 'timestamp': '2025-09-30 23:08:31.078070', 'step': 4851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:31.185148', 'step': 4851, 'epoch': 2} {'type': 'loss', 'content': 0.0018968440126627684, 'timestamp': '2025-09-30 23:08:31.197598', 'step': 4852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:31.288728', 'step': 4852, 'epoch': 2} {'type': 'loss', 'content': 0.000571125652641058, 'timestamp': '2025-09-30 23:08:31.296980', 'step': 4853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:31.354699', 'step': 4853, 'epoch': 2} {'type': 'loss', 'content': 0.004896629136055708, 'timestamp': '2025-09-30 23:08:31.357440', 'step': 4854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:31.413830', 'step': 4854, 'epoch': 2} {'type': 'loss', 'content': 0.006496170070022345, 'timestamp': '2025-09-30 23:08:31.416068', 'step': 4855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:31.473039', 'step': 4855, 'epoch': 2} {'type': 'loss', 'content': 0.05480268597602844, 'timestamp': '2025-09-30 23:08:31.481234', 'step': 4856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:31.534678', 'step': 4856, 'epoch': 2} {'type': 'loss', 'content': 0.022877272218465805, 'timestamp': '2025-09-30 23:08:31.537253', 'step': 4857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:31.590835', 'step': 4857, 'epoch': 2} {'type': 'loss', 'content': 0.006588050164282322, 'timestamp': '2025-09-30 23:08:31.593873', 'step': 4858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:31.653252', 'step': 4858, 'epoch': 2} {'type': 'loss', 'content': 0.01244402676820755, 'timestamp': '2025-09-30 23:08:31.655926', 'step': 4859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:31.709532', 'step': 4859, 'epoch': 2} {'type': 'loss', 'content': 0.004391537047922611, 'timestamp': '2025-09-30 23:08:31.716024', 'step': 4860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:31.768501', 'step': 4860, 'epoch': 2} {'type': 'loss', 'content': 0.02033570595085621, 'timestamp': '2025-09-30 23:08:31.771035', 'step': 4861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:31.826794', 'step': 4861, 'epoch': 2} {'type': 'loss', 'content': 0.00747100543230772, 'timestamp': '2025-09-30 23:08:31.830888', 'step': 4862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:31.887612', 'step': 4862, 'epoch': 2} {'type': 'loss', 'content': 0.001871910528279841, 'timestamp': '2025-09-30 23:08:31.891038', 'step': 4863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:31.944321', 'step': 4863, 'epoch': 2} {'type': 'loss', 'content': 0.03228536993265152, 'timestamp': '2025-09-30 23:08:31.952787', 'step': 4864, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1596914505344}], 'timestamp': '2025-09-30 23:08:37.320988', 'step': 4864, 'epoch': 2} {'type': 'pplx', 'content': 6948666.827768454, 'timestamp': '2025-09-30 23:08:37.329856', 'step': 4864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:37.391188', 'step': 4864, 'epoch': 2} {'type': 'loss', 'content': 0.01683381013572216, 'timestamp': '2025-09-30 23:08:37.394533', 'step': 4865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:37.461809', 'step': 4865, 'epoch': 2} {'type': 'loss', 'content': 0.022570470348000526, 'timestamp': '2025-09-30 23:08:37.466508', 'step': 4866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:37.525469', 'step': 4866, 'epoch': 2} {'type': 'loss', 'content': 0.05348554626107216, 'timestamp': '2025-09-30 23:08:37.534950', 'step': 4867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:37.604581', 'step': 4867, 'epoch': 2} {'type': 'loss', 'content': 0.02161707915365696, 'timestamp': '2025-09-30 23:08:37.611082', 'step': 4868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:37.684086', 'step': 4868, 'epoch': 2} {'type': 'loss', 'content': 0.00904914177954197, 'timestamp': '2025-09-30 23:08:37.702711', 'step': 4869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:37.769447', 'step': 4869, 'epoch': 2} {'type': 'loss', 'content': 0.003182819113135338, 'timestamp': '2025-09-30 23:08:37.777357', 'step': 4870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:37.864970', 'step': 4870, 'epoch': 2} {'type': 'loss', 'content': 0.0032930481247603893, 'timestamp': '2025-09-30 23:08:37.868579', 'step': 4871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [1, 80], 'flops': 400002507344.0}, 'timestamp': '2025-09-30 23:08:37.932829', 'step': 4871, 'epoch': 2} {'type': 'loss', 'content': 0.0008028350421227515, 'timestamp': '2025-09-30 23:08:37.945280', 'step': 4872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:38.011715', 'step': 4872, 'epoch': 3} {'type': 'loss', 'content': 0.02021261863410473, 'timestamp': '2025-09-30 23:08:38.020499', 'step': 4873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:38.090312', 'step': 4873, 'epoch': 3} {'type': 'loss', 'content': 0.02085105888545513, 'timestamp': '2025-09-30 23:08:38.097751', 'step': 4874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:38.172141', 'step': 4874, 'epoch': 3} {'type': 'loss', 'content': 0.013473227620124817, 'timestamp': '2025-09-30 23:08:38.181072', 'step': 4875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:38.255736', 'step': 4875, 'epoch': 3} {'type': 'loss', 'content': 0.016678592190146446, 'timestamp': '2025-09-30 23:08:38.270026', 'step': 4876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:38.338148', 'step': 4876, 'epoch': 3} {'type': 'loss', 'content': 0.019723910838365555, 'timestamp': '2025-09-30 23:08:38.348749', 'step': 4877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:38.420945', 'step': 4877, 'epoch': 3} {'type': 'loss', 'content': 0.0016867542872205377, 'timestamp': '2025-09-30 23:08:38.428953', 'step': 4878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:38.491023', 'step': 4878, 'epoch': 3} {'type': 'loss', 'content': 0.007209379691630602, 'timestamp': '2025-09-30 23:08:38.496019', 'step': 4879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:38.564428', 'step': 4879, 'epoch': 3} {'type': 'loss', 'content': 0.013359494507312775, 'timestamp': '2025-09-30 23:08:38.571221', 'step': 4880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:38.638051', 'step': 4880, 'epoch': 3} {'type': 'loss', 'content': 0.022181734442710876, 'timestamp': '2025-09-30 23:08:38.641064', 'step': 4881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:38.712604', 'step': 4881, 'epoch': 3} {'type': 'loss', 'content': 0.02756858989596367, 'timestamp': '2025-09-30 23:08:38.720953', 'step': 4882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:38.797010', 'step': 4882, 'epoch': 3} {'type': 'loss', 'content': 0.04661264270544052, 'timestamp': '2025-09-30 23:08:38.805380', 'step': 4883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:38.876711', 'step': 4883, 'epoch': 3} {'type': 'loss', 'content': 0.016302477568387985, 'timestamp': '2025-09-30 23:08:38.890977', 'step': 4884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:38.960178', 'step': 4884, 'epoch': 3} {'type': 'loss', 'content': 0.02656111679971218, 'timestamp': '2025-09-30 23:08:38.969227', 'step': 4885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:39.037194', 'step': 4885, 'epoch': 3} {'type': 'loss', 'content': 0.010208192281425, 'timestamp': '2025-09-30 23:08:39.045795', 'step': 4886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:39.121176', 'step': 4886, 'epoch': 3} {'type': 'loss', 'content': 0.010621530003845692, 'timestamp': '2025-09-30 23:08:39.130395', 'step': 4887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:39.208574', 'step': 4887, 'epoch': 3} {'type': 'loss', 'content': 0.016782348975539207, 'timestamp': '2025-09-30 23:08:39.217754', 'step': 4888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:39.291245', 'step': 4888, 'epoch': 3} {'type': 'loss', 'content': 0.01780809462070465, 'timestamp': '2025-09-30 23:08:39.296355', 'step': 4889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:39.360239', 'step': 4889, 'epoch': 3} {'type': 'loss', 'content': 0.0023758208844810724, 'timestamp': '2025-09-30 23:08:39.368436', 'step': 4890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:39.442313', 'step': 4890, 'epoch': 3} {'type': 'loss', 'content': 0.01646914705634117, 'timestamp': '2025-09-30 23:08:39.450613', 'step': 4891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:39.521000', 'step': 4891, 'epoch': 3} {'type': 'loss', 'content': 0.007969068363308907, 'timestamp': '2025-09-30 23:08:39.534805', 'step': 4892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:39.611553', 'step': 4892, 'epoch': 3} {'type': 'loss', 'content': 0.0038687370251864195, 'timestamp': '2025-09-30 23:08:39.618102', 'step': 4893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:39.692121', 'step': 4893, 'epoch': 3} {'type': 'loss', 'content': 0.010881677269935608, 'timestamp': '2025-09-30 23:08:39.697485', 'step': 4894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:39.764111', 'step': 4894, 'epoch': 3} {'type': 'loss', 'content': 0.02701645717024803, 'timestamp': '2025-09-30 23:08:39.769144', 'step': 4895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:39.840040', 'step': 4895, 'epoch': 3} {'type': 'loss', 'content': 0.0073217665776610374, 'timestamp': '2025-09-30 23:08:39.864744', 'step': 4896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:39.968999', 'step': 4896, 'epoch': 3} {'type': 'loss', 'content': 0.008311494253575802, 'timestamp': '2025-09-30 23:08:39.978175', 'step': 4897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:40.060815', 'step': 4897, 'epoch': 3} {'type': 'loss', 'content': 0.00474518071860075, 'timestamp': '2025-09-30 23:08:40.071711', 'step': 4898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:40.145847', 'step': 4898, 'epoch': 3} {'type': 'loss', 'content': 0.002375267678871751, 'timestamp': '2025-09-30 23:08:40.158221', 'step': 4899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:40.237697', 'step': 4899, 'epoch': 3} {'type': 'loss', 'content': 0.020327774807810783, 'timestamp': '2025-09-30 23:08:40.245947', 'step': 4900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:40.322148', 'step': 4900, 'epoch': 3} {'type': 'loss', 'content': 0.0002946353342849761, 'timestamp': '2025-09-30 23:08:40.328177', 'step': 4901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:40.421958', 'step': 4901, 'epoch': 3} {'type': 'loss', 'content': 0.01752174086868763, 'timestamp': '2025-09-30 23:08:40.446066', 'step': 4902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:40.559843', 'step': 4902, 'epoch': 3} {'type': 'loss', 'content': 0.008695443160831928, 'timestamp': '2025-09-30 23:08:40.565572', 'step': 4903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:40.631429', 'step': 4903, 'epoch': 3} {'type': 'loss', 'content': 0.0049902452155947685, 'timestamp': '2025-09-30 23:08:40.642327', 'step': 4904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:40.709138', 'step': 4904, 'epoch': 3} {'type': 'loss', 'content': 0.026715409010648727, 'timestamp': '2025-09-30 23:08:40.711878', 'step': 4905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:40.766716', 'step': 4905, 'epoch': 3} {'type': 'loss', 'content': 0.017818689346313477, 'timestamp': '2025-09-30 23:08:40.769532', 'step': 4906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:40.825266', 'step': 4906, 'epoch': 3} {'type': 'loss', 'content': 0.006837629713118076, 'timestamp': '2025-09-30 23:08:40.830491', 'step': 4907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:40.897369', 'step': 4907, 'epoch': 3} {'type': 'loss', 'content': 0.0003747040464077145, 'timestamp': '2025-09-30 23:08:40.903442', 'step': 4908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:40.957595', 'step': 4908, 'epoch': 3} {'type': 'loss', 'content': 0.0021282436791807413, 'timestamp': '2025-09-30 23:08:40.960408', 'step': 4909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:41.015010', 'step': 4909, 'epoch': 3} {'type': 'loss', 'content': 0.0023230293300002813, 'timestamp': '2025-09-30 23:08:41.017325', 'step': 4910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:41.072366', 'step': 4910, 'epoch': 3} {'type': 'loss', 'content': 0.0025504727382212877, 'timestamp': '2025-09-30 23:08:41.079213', 'step': 4911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:41.145246', 'step': 4911, 'epoch': 3} {'type': 'loss', 'content': 0.007515496574342251, 'timestamp': '2025-09-30 23:08:41.162572', 'step': 4912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:41.236876', 'step': 4912, 'epoch': 3} {'type': 'loss', 'content': 0.017937442287802696, 'timestamp': '2025-09-30 23:08:41.252352', 'step': 4913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:41.338255', 'step': 4913, 'epoch': 3} {'type': 'loss', 'content': 0.0013014905853196979, 'timestamp': '2025-09-30 23:08:41.350676', 'step': 4914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:41.415705', 'step': 4914, 'epoch': 3} {'type': 'loss', 'content': 0.0005822829552926123, 'timestamp': '2025-09-30 23:08:41.427729', 'step': 4915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:41.511796', 'step': 4915, 'epoch': 3} {'type': 'loss', 'content': 0.013369627296924591, 'timestamp': '2025-09-30 23:08:41.518646', 'step': 4916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:41.589960', 'step': 4916, 'epoch': 3} {'type': 'loss', 'content': 0.002919870661571622, 'timestamp': '2025-09-30 23:08:41.598868', 'step': 4917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:41.670186', 'step': 4917, 'epoch': 3} {'type': 'loss', 'content': 0.01499862875789404, 'timestamp': '2025-09-30 23:08:41.682252', 'step': 4918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:41.761600', 'step': 4918, 'epoch': 3} {'type': 'loss', 'content': 0.002071228576824069, 'timestamp': '2025-09-30 23:08:41.769581', 'step': 4919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:41.838994', 'step': 4919, 'epoch': 3} {'type': 'loss', 'content': 0.04199829697608948, 'timestamp': '2025-09-30 23:08:41.851236', 'step': 4920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:41.918052', 'step': 4920, 'epoch': 3} {'type': 'loss', 'content': 0.027416521683335304, 'timestamp': '2025-09-30 23:08:41.934032', 'step': 4921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:42.016719', 'step': 4921, 'epoch': 3} {'type': 'loss', 'content': 0.007848164066672325, 'timestamp': '2025-09-30 23:08:42.024638', 'step': 4922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:42.096083', 'step': 4922, 'epoch': 3} {'type': 'loss', 'content': 0.007039159536361694, 'timestamp': '2025-09-30 23:08:42.098502', 'step': 4923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:08:42.157118', 'step': 4923, 'epoch': 3} {'type': 'loss', 'content': 0.039315689355134964, 'timestamp': '2025-09-30 23:08:42.163855', 'step': 4924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:42.221822', 'step': 4924, 'epoch': 3} {'type': 'loss', 'content': 0.05877014249563217, 'timestamp': '2025-09-30 23:08:42.224984', 'step': 4925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:42.291624', 'step': 4925, 'epoch': 3} {'type': 'loss', 'content': 0.003495188197121024, 'timestamp': '2025-09-30 23:08:42.299701', 'step': 4926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:42.363204', 'step': 4926, 'epoch': 3} {'type': 'loss', 'content': 0.0017470278544351459, 'timestamp': '2025-09-30 23:08:42.369940', 'step': 4927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:42.431194', 'step': 4927, 'epoch': 3} {'type': 'loss', 'content': 0.006316289305686951, 'timestamp': '2025-09-30 23:08:42.441912', 'step': 4928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:42.509073', 'step': 4928, 'epoch': 3} {'type': 'loss', 'content': 0.0025951818097382784, 'timestamp': '2025-09-30 23:08:42.517019', 'step': 4929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:42.592468', 'step': 4929, 'epoch': 3} {'type': 'loss', 'content': 0.010419129393994808, 'timestamp': '2025-09-30 23:08:42.602700', 'step': 4930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:42.671911', 'step': 4930, 'epoch': 3} {'type': 'loss', 'content': 0.000257625913945958, 'timestamp': '2025-09-30 23:08:42.680591', 'step': 4931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:42.748126', 'step': 4931, 'epoch': 3} {'type': 'loss', 'content': 0.04885304719209671, 'timestamp': '2025-09-30 23:08:42.760473', 'step': 4932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:42.834845', 'step': 4932, 'epoch': 3} {'type': 'loss', 'content': 0.00141801405698061, 'timestamp': '2025-09-30 23:08:42.839339', 'step': 4933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:42.923738', 'step': 4933, 'epoch': 3} {'type': 'loss', 'content': 0.00979603175073862, 'timestamp': '2025-09-30 23:08:42.927806', 'step': 4934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:42.989900', 'step': 4934, 'epoch': 3} {'type': 'loss', 'content': 0.004592549055814743, 'timestamp': '2025-09-30 23:08:42.995878', 'step': 4935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:43.066021', 'step': 4935, 'epoch': 3} {'type': 'loss', 'content': 0.02970028854906559, 'timestamp': '2025-09-30 23:08:43.072934', 'step': 4936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:43.139808', 'step': 4936, 'epoch': 3} {'type': 'loss', 'content': 0.020201830193400383, 'timestamp': '2025-09-30 23:08:43.149320', 'step': 4937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:43.217367', 'step': 4937, 'epoch': 3} {'type': 'loss', 'content': 0.00191992218606174, 'timestamp': '2025-09-30 23:08:43.224713', 'step': 4938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:43.291774', 'step': 4938, 'epoch': 3} {'type': 'loss', 'content': 0.0013244269648566842, 'timestamp': '2025-09-30 23:08:43.297963', 'step': 4939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:43.372055', 'step': 4939, 'epoch': 3} {'type': 'loss', 'content': 0.007036369293928146, 'timestamp': '2025-09-30 23:08:43.378648', 'step': 4940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:43.438154', 'step': 4940, 'epoch': 3} {'type': 'loss', 'content': 0.01967201568186283, 'timestamp': '2025-09-30 23:08:43.444617', 'step': 4941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:43.513665', 'step': 4941, 'epoch': 3} {'type': 'loss', 'content': 0.00869003962725401, 'timestamp': '2025-09-30 23:08:43.523026', 'step': 4942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:43.596471', 'step': 4942, 'epoch': 3} {'type': 'loss', 'content': 0.006399799138307571, 'timestamp': '2025-09-30 23:08:43.606422', 'step': 4943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:43.679884', 'step': 4943, 'epoch': 3} {'type': 'loss', 'content': 0.0053665065206587315, 'timestamp': '2025-09-30 23:08:43.686909', 'step': 4944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:43.764698', 'step': 4944, 'epoch': 3} {'type': 'loss', 'content': 0.026364892721176147, 'timestamp': '2025-09-30 23:08:43.773389', 'step': 4945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:43.848141', 'step': 4945, 'epoch': 3} {'type': 'loss', 'content': 0.010020198300480843, 'timestamp': '2025-09-30 23:08:43.856705', 'step': 4946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:43.934599', 'step': 4946, 'epoch': 3} {'type': 'loss', 'content': 0.04178210720419884, 'timestamp': '2025-09-30 23:08:43.941329', 'step': 4947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:44.016713', 'step': 4947, 'epoch': 3} {'type': 'loss', 'content': 0.008335095830261707, 'timestamp': '2025-09-30 23:08:44.030252', 'step': 4948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:44.098543', 'step': 4948, 'epoch': 3} {'type': 'loss', 'content': 0.01951807551085949, 'timestamp': '2025-09-30 23:08:44.104729', 'step': 4949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:44.173446', 'step': 4949, 'epoch': 3} {'type': 'loss', 'content': 0.011709402315318584, 'timestamp': '2025-09-30 23:08:44.180935', 'step': 4950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:44.253282', 'step': 4950, 'epoch': 3} {'type': 'loss', 'content': 0.03857603296637535, 'timestamp': '2025-09-30 23:08:44.260921', 'step': 4951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:44.333228', 'step': 4951, 'epoch': 3} {'type': 'loss', 'content': 0.0013121935771778226, 'timestamp': '2025-09-30 23:08:44.339949', 'step': 4952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:08:44.412591', 'step': 4952, 'epoch': 3} {'type': 'loss', 'content': 0.0032465581316500902, 'timestamp': '2025-09-30 23:08:44.416383', 'step': 4953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:44.481739', 'step': 4953, 'epoch': 3} {'type': 'loss', 'content': 0.03163967281579971, 'timestamp': '2025-09-30 23:08:44.484153', 'step': 4954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:44.539134', 'step': 4954, 'epoch': 3} {'type': 'loss', 'content': 0.023523500189185143, 'timestamp': '2025-09-30 23:08:44.542028', 'step': 4955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:44.598136', 'step': 4955, 'epoch': 3} {'type': 'loss', 'content': 0.03803678974509239, 'timestamp': '2025-09-30 23:08:44.605660', 'step': 4956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:44.661239', 'step': 4956, 'epoch': 3} {'type': 'loss', 'content': 0.015359985642135143, 'timestamp': '2025-09-30 23:08:44.663507', 'step': 4957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:44.716592', 'step': 4957, 'epoch': 3} {'type': 'loss', 'content': 0.012218991294503212, 'timestamp': '2025-09-30 23:08:44.725963', 'step': 4958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:44.778992', 'step': 4958, 'epoch': 3} {'type': 'loss', 'content': 0.007369517348706722, 'timestamp': '2025-09-30 23:08:44.781430', 'step': 4959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:44.834741', 'step': 4959, 'epoch': 3} {'type': 'loss', 'content': 0.02771582081913948, 'timestamp': '2025-09-30 23:08:44.840643', 'step': 4960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:44.893236', 'step': 4960, 'epoch': 3} {'type': 'loss', 'content': 0.0017134143272414804, 'timestamp': '2025-09-30 23:08:44.895727', 'step': 4961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:44.948743', 'step': 4961, 'epoch': 3} {'type': 'loss', 'content': 0.013259624131023884, 'timestamp': '2025-09-30 23:08:44.951460', 'step': 4962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:45.004391', 'step': 4962, 'epoch': 3} {'type': 'loss', 'content': 0.0075651779770851135, 'timestamp': '2025-09-30 23:08:45.006873', 'step': 4963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:45.060043', 'step': 4963, 'epoch': 3} {'type': 'loss', 'content': 0.017769908532500267, 'timestamp': '2025-09-30 23:08:45.065680', 'step': 4964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:45.117638', 'step': 4964, 'epoch': 3} {'type': 'loss', 'content': 0.00948478002101183, 'timestamp': '2025-09-30 23:08:45.120104', 'step': 4965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:45.173613', 'step': 4965, 'epoch': 3} {'type': 'loss', 'content': 0.020222976803779602, 'timestamp': '2025-09-30 23:08:45.175893', 'step': 4966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:45.228399', 'step': 4966, 'epoch': 3} {'type': 'loss', 'content': 0.04596136137843132, 'timestamp': '2025-09-30 23:08:45.232129', 'step': 4967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:45.284747', 'step': 4967, 'epoch': 3} {'type': 'loss', 'content': 0.007631615269929171, 'timestamp': '2025-09-30 23:08:45.290392', 'step': 4968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:45.344000', 'step': 4968, 'epoch': 3} {'type': 'loss', 'content': 0.016402628272771835, 'timestamp': '2025-09-30 23:08:45.346287', 'step': 4969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:45.399357', 'step': 4969, 'epoch': 3} {'type': 'loss', 'content': 0.0007674781954847276, 'timestamp': '2025-09-30 23:08:45.401775', 'step': 4970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:45.454659', 'step': 4970, 'epoch': 3} {'type': 'loss', 'content': 0.008066775277256966, 'timestamp': '2025-09-30 23:08:45.456920', 'step': 4971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:45.509662', 'step': 4971, 'epoch': 3} {'type': 'loss', 'content': 0.007723704911768436, 'timestamp': '2025-09-30 23:08:45.517521', 'step': 4972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:45.573382', 'step': 4972, 'epoch': 3} {'type': 'loss', 'content': 0.03122839145362377, 'timestamp': '2025-09-30 23:08:45.575854', 'step': 4973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:45.629151', 'step': 4973, 'epoch': 3} {'type': 'loss', 'content': 0.02591048553586006, 'timestamp': '2025-09-30 23:08:45.631718', 'step': 4974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:45.685885', 'step': 4974, 'epoch': 3} {'type': 'loss', 'content': 0.0214559193700552, 'timestamp': '2025-09-30 23:08:45.688426', 'step': 4975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:45.741505', 'step': 4975, 'epoch': 3} {'type': 'loss', 'content': 0.004249344114214182, 'timestamp': '2025-09-30 23:08:45.747620', 'step': 4976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:45.800075', 'step': 4976, 'epoch': 3} {'type': 'loss', 'content': 0.0033284537494182587, 'timestamp': '2025-09-30 23:08:45.802887', 'step': 4977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:45.860969', 'step': 4977, 'epoch': 3} {'type': 'loss', 'content': 0.002124544233083725, 'timestamp': '2025-09-30 23:08:45.863178', 'step': 4978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:45.916072', 'step': 4978, 'epoch': 3} {'type': 'loss', 'content': 0.042818356305360794, 'timestamp': '2025-09-30 23:08:45.918729', 'step': 4979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:45.971910', 'step': 4979, 'epoch': 3} {'type': 'loss', 'content': 0.01554876659065485, 'timestamp': '2025-09-30 23:08:45.978245', 'step': 4980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:46.030486', 'step': 4980, 'epoch': 3} {'type': 'loss', 'content': 0.001457109465263784, 'timestamp': '2025-09-30 23:08:46.032814', 'step': 4981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:46.088388', 'step': 4981, 'epoch': 3} {'type': 'loss', 'content': 0.017644355073571205, 'timestamp': '2025-09-30 23:08:46.090554', 'step': 4982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:46.146823', 'step': 4982, 'epoch': 3} {'type': 'loss', 'content': 0.06418274343013763, 'timestamp': '2025-09-30 23:08:46.149391', 'step': 4983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:46.202423', 'step': 4983, 'epoch': 3} {'type': 'loss', 'content': 0.007118087261915207, 'timestamp': '2025-09-30 23:08:46.208176', 'step': 4984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:46.260184', 'step': 4984, 'epoch': 3} {'type': 'loss', 'content': 0.017787890508770943, 'timestamp': '2025-09-30 23:08:46.262440', 'step': 4985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:46.316465', 'step': 4985, 'epoch': 3} {'type': 'loss', 'content': 0.002534005092456937, 'timestamp': '2025-09-30 23:08:46.318721', 'step': 4986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:46.372733', 'step': 4986, 'epoch': 3} {'type': 'loss', 'content': 0.016927357763051987, 'timestamp': '2025-09-30 23:08:46.375084', 'step': 4987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:46.428296', 'step': 4987, 'epoch': 3} {'type': 'loss', 'content': 0.008461348712444305, 'timestamp': '2025-09-30 23:08:46.434526', 'step': 4988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:46.491787', 'step': 4988, 'epoch': 3} {'type': 'loss', 'content': 0.036642540246248245, 'timestamp': '2025-09-30 23:08:46.494400', 'step': 4989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:46.547551', 'step': 4989, 'epoch': 3} {'type': 'loss', 'content': 0.0014502510894089937, 'timestamp': '2025-09-30 23:08:46.551061', 'step': 4990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:46.615405', 'step': 4990, 'epoch': 3} {'type': 'loss', 'content': 0.0006876570987515152, 'timestamp': '2025-09-30 23:08:46.617714', 'step': 4991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:46.671088', 'step': 4991, 'epoch': 3} {'type': 'loss', 'content': 0.00456073647364974, 'timestamp': '2025-09-30 23:08:46.677299', 'step': 4992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:46.729931', 'step': 4992, 'epoch': 3} {'type': 'loss', 'content': 0.0005260144243948162, 'timestamp': '2025-09-30 23:08:46.732429', 'step': 4993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:46.785540', 'step': 4993, 'epoch': 3} {'type': 'loss', 'content': 0.0030244006775319576, 'timestamp': '2025-09-30 23:08:46.787784', 'step': 4994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:46.841095', 'step': 4994, 'epoch': 3} {'type': 'loss', 'content': 0.0011162416776642203, 'timestamp': '2025-09-30 23:08:46.843601', 'step': 4995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:46.897380', 'step': 4995, 'epoch': 3} {'type': 'loss', 'content': 0.0006523846532218158, 'timestamp': '2025-09-30 23:08:46.903293', 'step': 4996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:46.956432', 'step': 4996, 'epoch': 3} {'type': 'loss', 'content': 0.023075217381119728, 'timestamp': '2025-09-30 23:08:46.958908', 'step': 4997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:47.012102', 'step': 4997, 'epoch': 3} {'type': 'loss', 'content': 0.000952088856138289, 'timestamp': '2025-09-30 23:08:47.014396', 'step': 4998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:47.067151', 'step': 4998, 'epoch': 3} {'type': 'loss', 'content': 0.02717476524412632, 'timestamp': '2025-09-30 23:08:47.069464', 'step': 4999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:47.122878', 'step': 4999, 'epoch': 3} {'type': 'loss', 'content': 0.008517983369529247, 'timestamp': '2025-09-30 23:08:47.128672', 'step': 5000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 5000', 'timestamp': '2025-09-30 23:08:47.688846', 'step': 5000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:47.741174', 'step': 5000, 'epoch': 3} {'type': 'loss', 'content': 0.003934313543140888, 'timestamp': '2025-09-30 23:08:47.744112', 'step': 5001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:47.797615', 'step': 5001, 'epoch': 3} {'type': 'loss', 'content': 0.0020477287471294403, 'timestamp': '2025-09-30 23:08:47.799941', 'step': 5002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:47.852911', 'step': 5002, 'epoch': 3} {'type': 'loss', 'content': 0.012959075160324574, 'timestamp': '2025-09-30 23:08:47.856371', 'step': 5003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:47.910258', 'step': 5003, 'epoch': 3} {'type': 'loss', 'content': 0.006242082919925451, 'timestamp': '2025-09-30 23:08:47.916058', 'step': 5004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:47.968646', 'step': 5004, 'epoch': 3} {'type': 'loss', 'content': 0.004554987419396639, 'timestamp': '2025-09-30 23:08:47.970702', 'step': 5005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:48.024658', 'step': 5005, 'epoch': 3} {'type': 'loss', 'content': 0.00021698871569242328, 'timestamp': '2025-09-30 23:08:48.027444', 'step': 5006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:48.082267', 'step': 5006, 'epoch': 3} {'type': 'loss', 'content': 0.004922021180391312, 'timestamp': '2025-09-30 23:08:48.085586', 'step': 5007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:48.141077', 'step': 5007, 'epoch': 3} {'type': 'loss', 'content': 0.005496643017977476, 'timestamp': '2025-09-30 23:08:48.147285', 'step': 5008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:48.202707', 'step': 5008, 'epoch': 3} {'type': 'loss', 'content': 0.005057165864855051, 'timestamp': '2025-09-30 23:08:48.205594', 'step': 5009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:48.261152', 'step': 5009, 'epoch': 3} {'type': 'loss', 'content': 0.001633235951885581, 'timestamp': '2025-09-30 23:08:48.264484', 'step': 5010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:48.321583', 'step': 5010, 'epoch': 3} {'type': 'loss', 'content': 0.004290735814720392, 'timestamp': '2025-09-30 23:08:48.325049', 'step': 5011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:48.382809', 'step': 5011, 'epoch': 3} {'type': 'loss', 'content': 0.0035602599382400513, 'timestamp': '2025-09-30 23:08:48.389173', 'step': 5012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:48.449868', 'step': 5012, 'epoch': 3} {'type': 'loss', 'content': 0.0032812191639095545, 'timestamp': '2025-09-30 23:08:48.452893', 'step': 5013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:48.508157', 'step': 5013, 'epoch': 3} {'type': 'loss', 'content': 0.009251425042748451, 'timestamp': '2025-09-30 23:08:48.511346', 'step': 5014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:48.568375', 'step': 5014, 'epoch': 3} {'type': 'loss', 'content': 0.0007632990018464625, 'timestamp': '2025-09-30 23:08:48.572942', 'step': 5015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:48.631360', 'step': 5015, 'epoch': 3} {'type': 'loss', 'content': 0.01634385995566845, 'timestamp': '2025-09-30 23:08:48.637717', 'step': 5016, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1596914505344}], 'timestamp': '2025-09-30 23:08:52.313638', 'step': 5016, 'epoch': 3} {'type': 'pplx', 'content': 9086697.15612057, 'timestamp': '2025-09-30 23:08:52.316048', 'step': 5016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:52.367659', 'step': 5016, 'epoch': 3} {'type': 'loss', 'content': 0.016672486439347267, 'timestamp': '2025-09-30 23:08:52.370137', 'step': 5017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:52.424275', 'step': 5017, 'epoch': 3} {'type': 'loss', 'content': 0.020889488980174065, 'timestamp': '2025-09-30 23:08:52.427784', 'step': 5018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:52.481237', 'step': 5018, 'epoch': 3} {'type': 'loss', 'content': 0.0007411330589093268, 'timestamp': '2025-09-30 23:08:52.483477', 'step': 5019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:52.537238', 'step': 5019, 'epoch': 3} {'type': 'loss', 'content': 0.00044166544103063643, 'timestamp': '2025-09-30 23:08:52.543876', 'step': 5020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:52.597349', 'step': 5020, 'epoch': 3} {'type': 'loss', 'content': 0.005554527044296265, 'timestamp': '2025-09-30 23:08:52.602736', 'step': 5021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:52.660428', 'step': 5021, 'epoch': 3} {'type': 'loss', 'content': 0.0017920739483088255, 'timestamp': '2025-09-30 23:08:52.662833', 'step': 5022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:52.717028', 'step': 5022, 'epoch': 3} {'type': 'loss', 'content': 0.006780731957405806, 'timestamp': '2025-09-30 23:08:52.719136', 'step': 5023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:52.771957', 'step': 5023, 'epoch': 3} {'type': 'loss', 'content': 0.005741015542298555, 'timestamp': '2025-09-30 23:08:52.777854', 'step': 5024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:52.829920', 'step': 5024, 'epoch': 3} {'type': 'loss', 'content': 0.023745562881231308, 'timestamp': '2025-09-30 23:08:52.833372', 'step': 5025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:52.885634', 'step': 5025, 'epoch': 3} {'type': 'loss', 'content': 0.004820062313228846, 'timestamp': '2025-09-30 23:08:52.889288', 'step': 5026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:52.942377', 'step': 5026, 'epoch': 3} {'type': 'loss', 'content': 0.0005781570798717439, 'timestamp': '2025-09-30 23:08:52.944942', 'step': 5027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:52.997759', 'step': 5027, 'epoch': 3} {'type': 'loss', 'content': 0.010972680523991585, 'timestamp': '2025-09-30 23:08:53.003722', 'step': 5028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:53.056218', 'step': 5028, 'epoch': 3} {'type': 'loss', 'content': 0.002776656299829483, 'timestamp': '2025-09-30 23:08:53.058708', 'step': 5029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:53.111751', 'step': 5029, 'epoch': 3} {'type': 'loss', 'content': 0.00018405374430585653, 'timestamp': '2025-09-30 23:08:53.114240', 'step': 5030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:53.166746', 'step': 5030, 'epoch': 3} {'type': 'loss', 'content': 0.0007050921558402479, 'timestamp': '2025-09-30 23:08:53.169185', 'step': 5031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:53.222431', 'step': 5031, 'epoch': 3} {'type': 'loss', 'content': 0.006804864387959242, 'timestamp': '2025-09-30 23:08:53.228171', 'step': 5032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:53.280979', 'step': 5032, 'epoch': 3} {'type': 'loss', 'content': 0.000728235871065408, 'timestamp': '2025-09-30 23:08:53.283136', 'step': 5033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:53.337944', 'step': 5033, 'epoch': 3} {'type': 'loss', 'content': 0.0033751954324543476, 'timestamp': '2025-09-30 23:08:53.340251', 'step': 5034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:53.393338', 'step': 5034, 'epoch': 3} {'type': 'loss', 'content': 0.0003952790575567633, 'timestamp': '2025-09-30 23:08:53.395627', 'step': 5035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:53.448546', 'step': 5035, 'epoch': 3} {'type': 'loss', 'content': 0.003947202116250992, 'timestamp': '2025-09-30 23:08:53.454377', 'step': 5036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:53.506692', 'step': 5036, 'epoch': 3} {'type': 'loss', 'content': 0.004196613561362028, 'timestamp': '2025-09-30 23:08:53.508958', 'step': 5037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:53.562713', 'step': 5037, 'epoch': 3} {'type': 'loss', 'content': 0.0021440708078444004, 'timestamp': '2025-09-30 23:08:53.566117', 'step': 5038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:53.622951', 'step': 5038, 'epoch': 3} {'type': 'loss', 'content': 0.01260889321565628, 'timestamp': '2025-09-30 23:08:53.629235', 'step': 5039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:53.685586', 'step': 5039, 'epoch': 3} {'type': 'loss', 'content': 0.0015298586804419756, 'timestamp': '2025-09-30 23:08:53.691555', 'step': 5040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:53.744534', 'step': 5040, 'epoch': 3} {'type': 'loss', 'content': 0.005452267825603485, 'timestamp': '2025-09-30 23:08:53.746931', 'step': 5041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:53.799509', 'step': 5041, 'epoch': 3} {'type': 'loss', 'content': 0.0024730588775128126, 'timestamp': '2025-09-30 23:08:53.802199', 'step': 5042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:53.854774', 'step': 5042, 'epoch': 3} {'type': 'loss', 'content': 0.004712558351457119, 'timestamp': '2025-09-30 23:08:53.857176', 'step': 5043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:53.910425', 'step': 5043, 'epoch': 3} {'type': 'loss', 'content': 0.001425561378709972, 'timestamp': '2025-09-30 23:08:53.916275', 'step': 5044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:53.968984', 'step': 5044, 'epoch': 3} {'type': 'loss', 'content': 0.0036512394435703754, 'timestamp': '2025-09-30 23:08:53.971912', 'step': 5045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:54.024907', 'step': 5045, 'epoch': 3} {'type': 'loss', 'content': 0.001991474535316229, 'timestamp': '2025-09-30 23:08:54.027783', 'step': 5046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:54.081782', 'step': 5046, 'epoch': 3} {'type': 'loss', 'content': 0.0020156563259661198, 'timestamp': '2025-09-30 23:08:54.084018', 'step': 5047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:54.136915', 'step': 5047, 'epoch': 3} {'type': 'loss', 'content': 0.00028748702607117593, 'timestamp': '2025-09-30 23:08:54.142663', 'step': 5048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:54.206093', 'step': 5048, 'epoch': 3} {'type': 'loss', 'content': 0.002260242123156786, 'timestamp': '2025-09-30 23:08:54.208477', 'step': 5049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:54.261542', 'step': 5049, 'epoch': 3} {'type': 'loss', 'content': 0.000181601892109029, 'timestamp': '2025-09-30 23:08:54.263703', 'step': 5050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:54.316466', 'step': 5050, 'epoch': 3} {'type': 'loss', 'content': 7.62554700486362e-05, 'timestamp': '2025-09-30 23:08:54.318869', 'step': 5051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:54.371973', 'step': 5051, 'epoch': 3} {'type': 'loss', 'content': 0.008279320783913136, 'timestamp': '2025-09-30 23:08:54.377908', 'step': 5052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:54.430393', 'step': 5052, 'epoch': 3} {'type': 'loss', 'content': 0.0005823842366226017, 'timestamp': '2025-09-30 23:08:54.432707', 'step': 5053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:54.486089', 'step': 5053, 'epoch': 3} {'type': 'loss', 'content': 0.0005160131258890033, 'timestamp': '2025-09-30 23:08:54.488464', 'step': 5054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:54.541913', 'step': 5054, 'epoch': 3} {'type': 'loss', 'content': 0.00016504987434018403, 'timestamp': '2025-09-30 23:08:54.544572', 'step': 5055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:54.597410', 'step': 5055, 'epoch': 3} {'type': 'loss', 'content': 0.000445272569777444, 'timestamp': '2025-09-30 23:08:54.605317', 'step': 5056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:54.661959', 'step': 5056, 'epoch': 3} {'type': 'loss', 'content': 0.003260988276451826, 'timestamp': '2025-09-30 23:08:54.664363', 'step': 5057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:54.718232', 'step': 5057, 'epoch': 3} {'type': 'loss', 'content': 0.0002730028936639428, 'timestamp': '2025-09-30 23:08:54.720848', 'step': 5058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:54.773740', 'step': 5058, 'epoch': 3} {'type': 'loss', 'content': 0.0007515741162933409, 'timestamp': '2025-09-30 23:08:54.776083', 'step': 5059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:54.829130', 'step': 5059, 'epoch': 3} {'type': 'loss', 'content': 0.0008370750583708286, 'timestamp': '2025-09-30 23:08:54.834887', 'step': 5060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:54.887712', 'step': 5060, 'epoch': 3} {'type': 'loss', 'content': 0.02522038295865059, 'timestamp': '2025-09-30 23:08:54.890458', 'step': 5061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:54.944126', 'step': 5061, 'epoch': 3} {'type': 'loss', 'content': 0.0004368432273622602, 'timestamp': '2025-09-30 23:08:54.947746', 'step': 5062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:55.000646', 'step': 5062, 'epoch': 3} {'type': 'loss', 'content': 0.006309441290795803, 'timestamp': '2025-09-30 23:08:55.003137', 'step': 5063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:55.056540', 'step': 5063, 'epoch': 3} {'type': 'loss', 'content': 3.9067766920197755e-05, 'timestamp': '2025-09-30 23:08:55.063756', 'step': 5064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:55.116015', 'step': 5064, 'epoch': 3} {'type': 'loss', 'content': 0.017432505264878273, 'timestamp': '2025-09-30 23:08:55.122005', 'step': 5065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:55.186475', 'step': 5065, 'epoch': 3} {'type': 'loss', 'content': 0.028323683887720108, 'timestamp': '2025-09-30 23:08:55.189797', 'step': 5066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:55.249240', 'step': 5066, 'epoch': 3} {'type': 'loss', 'content': 0.01313769817352295, 'timestamp': '2025-09-30 23:08:55.252260', 'step': 5067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:55.305555', 'step': 5067, 'epoch': 3} {'type': 'loss', 'content': 6.259014480747283e-05, 'timestamp': '2025-09-30 23:08:55.313211', 'step': 5068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:55.367644', 'step': 5068, 'epoch': 3} {'type': 'loss', 'content': 0.019047534093260765, 'timestamp': '2025-09-30 23:08:55.370072', 'step': 5069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:55.423286', 'step': 5069, 'epoch': 3} {'type': 'loss', 'content': 0.04612598195672035, 'timestamp': '2025-09-30 23:08:55.426067', 'step': 5070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:55.479704', 'step': 5070, 'epoch': 3} {'type': 'loss', 'content': 0.029755545780062675, 'timestamp': '2025-09-30 23:08:55.482224', 'step': 5071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:55.534782', 'step': 5071, 'epoch': 3} {'type': 'loss', 'content': 0.009958240203559399, 'timestamp': '2025-09-30 23:08:55.540609', 'step': 5072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:55.592827', 'step': 5072, 'epoch': 3} {'type': 'loss', 'content': 0.000262182584265247, 'timestamp': '2025-09-30 23:08:55.596810', 'step': 5073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:55.653245', 'step': 5073, 'epoch': 3} {'type': 'loss', 'content': 0.004017944913357496, 'timestamp': '2025-09-30 23:08:55.655802', 'step': 5074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:55.709192', 'step': 5074, 'epoch': 3} {'type': 'loss', 'content': 0.03965315222740173, 'timestamp': '2025-09-30 23:08:55.711496', 'step': 5075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:55.766920', 'step': 5075, 'epoch': 3} {'type': 'loss', 'content': 0.0011595668038353324, 'timestamp': '2025-09-30 23:08:55.774577', 'step': 5076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:55.830038', 'step': 5076, 'epoch': 3} {'type': 'loss', 'content': 0.0005079262191429734, 'timestamp': '2025-09-30 23:08:55.832875', 'step': 5077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:55.889661', 'step': 5077, 'epoch': 3} {'type': 'loss', 'content': 0.010549454018473625, 'timestamp': '2025-09-30 23:08:55.892647', 'step': 5078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:55.948809', 'step': 5078, 'epoch': 3} {'type': 'loss', 'content': 0.029016023501753807, 'timestamp': '2025-09-30 23:08:55.951819', 'step': 5079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:56.006148', 'step': 5079, 'epoch': 3} {'type': 'loss', 'content': 0.0016116801416501403, 'timestamp': '2025-09-30 23:08:56.013585', 'step': 5080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:56.068875', 'step': 5080, 'epoch': 3} {'type': 'loss', 'content': 0.00398417841643095, 'timestamp': '2025-09-30 23:08:56.073288', 'step': 5081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:56.128955', 'step': 5081, 'epoch': 3} {'type': 'loss', 'content': 0.0003230941656511277, 'timestamp': '2025-09-30 23:08:56.132012', 'step': 5082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:56.187470', 'step': 5082, 'epoch': 3} {'type': 'loss', 'content': 0.0026981832925230265, 'timestamp': '2025-09-30 23:08:56.190366', 'step': 5083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:56.245665', 'step': 5083, 'epoch': 3} {'type': 'loss', 'content': 0.002792022656649351, 'timestamp': '2025-09-30 23:08:56.252280', 'step': 5084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:56.306238', 'step': 5084, 'epoch': 3} {'type': 'loss', 'content': 0.0019698499236255884, 'timestamp': '2025-09-30 23:08:56.309405', 'step': 5085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:56.365376', 'step': 5085, 'epoch': 3} {'type': 'loss', 'content': 0.02021283470094204, 'timestamp': '2025-09-30 23:08:56.368324', 'step': 5086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:56.424906', 'step': 5086, 'epoch': 3} {'type': 'loss', 'content': 0.0038991335313767195, 'timestamp': '2025-09-30 23:08:56.428169', 'step': 5087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:56.486801', 'step': 5087, 'epoch': 3} {'type': 'loss', 'content': 0.0017989575862884521, 'timestamp': '2025-09-30 23:08:56.492840', 'step': 5088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:56.547739', 'step': 5088, 'epoch': 3} {'type': 'loss', 'content': 0.0004889015108346939, 'timestamp': '2025-09-30 23:08:56.550849', 'step': 5089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:56.605799', 'step': 5089, 'epoch': 3} {'type': 'loss', 'content': 0.00022814761905465275, 'timestamp': '2025-09-30 23:08:56.608864', 'step': 5090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:56.664595', 'step': 5090, 'epoch': 3} {'type': 'loss', 'content': 0.0005849851295351982, 'timestamp': '2025-09-30 23:08:56.667024', 'step': 5091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:56.720302', 'step': 5091, 'epoch': 3} {'type': 'loss', 'content': 0.09137614071369171, 'timestamp': '2025-09-30 23:08:56.725911', 'step': 5092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:56.778347', 'step': 5092, 'epoch': 3} {'type': 'loss', 'content': 0.006204289384186268, 'timestamp': '2025-09-30 23:08:56.781747', 'step': 5093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:56.834479', 'step': 5093, 'epoch': 3} {'type': 'loss', 'content': 6.219488568603992e-05, 'timestamp': '2025-09-30 23:08:56.836865', 'step': 5094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:56.890225', 'step': 5094, 'epoch': 3} {'type': 'loss', 'content': 0.004042924381792545, 'timestamp': '2025-09-30 23:08:56.894212', 'step': 5095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:56.947260', 'step': 5095, 'epoch': 3} {'type': 'loss', 'content': 0.04938614368438721, 'timestamp': '2025-09-30 23:08:56.953848', 'step': 5096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:57.005737', 'step': 5096, 'epoch': 3} {'type': 'loss', 'content': 0.09317176043987274, 'timestamp': '2025-09-30 23:08:57.008027', 'step': 5097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:57.060822', 'step': 5097, 'epoch': 3} {'type': 'loss', 'content': 0.002502530114725232, 'timestamp': '2025-09-30 23:08:57.063450', 'step': 5098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:57.116505', 'step': 5098, 'epoch': 3} {'type': 'loss', 'content': 0.02688024565577507, 'timestamp': '2025-09-30 23:08:57.119730', 'step': 5099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:57.172486', 'step': 5099, 'epoch': 3} {'type': 'loss', 'content': 0.004554957617074251, 'timestamp': '2025-09-30 23:08:57.178271', 'step': 5100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:57.231470', 'step': 5100, 'epoch': 3} {'type': 'loss', 'content': 0.0025389466900378466, 'timestamp': '2025-09-30 23:08:57.233986', 'step': 5101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:57.287050', 'step': 5101, 'epoch': 3} {'type': 'loss', 'content': 0.041141215711832047, 'timestamp': '2025-09-30 23:08:57.289679', 'step': 5102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:57.342804', 'step': 5102, 'epoch': 3} {'type': 'loss', 'content': 0.00011256539437454194, 'timestamp': '2025-09-30 23:08:57.347715', 'step': 5103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:57.400706', 'step': 5103, 'epoch': 3} {'type': 'loss', 'content': 0.0031140814535319805, 'timestamp': '2025-09-30 23:08:57.406699', 'step': 5104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:57.459378', 'step': 5104, 'epoch': 3} {'type': 'loss', 'content': 0.028157785534858704, 'timestamp': '2025-09-30 23:08:57.461879', 'step': 5105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:57.514663', 'step': 5105, 'epoch': 3} {'type': 'loss', 'content': 0.002663410734385252, 'timestamp': '2025-09-30 23:08:57.516953', 'step': 5106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:57.572844', 'step': 5106, 'epoch': 3} {'type': 'loss', 'content': 0.005624661687761545, 'timestamp': '2025-09-30 23:08:57.575050', 'step': 5107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:57.628529', 'step': 5107, 'epoch': 3} {'type': 'loss', 'content': 0.0010049331467598677, 'timestamp': '2025-09-30 23:08:57.634203', 'step': 5108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:57.687014', 'step': 5108, 'epoch': 3} {'type': 'loss', 'content': 0.002877222141250968, 'timestamp': '2025-09-30 23:08:57.689469', 'step': 5109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:57.743267', 'step': 5109, 'epoch': 3} {'type': 'loss', 'content': 0.0019549510907381773, 'timestamp': '2025-09-30 23:08:57.745812', 'step': 5110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:57.799345', 'step': 5110, 'epoch': 3} {'type': 'loss', 'content': 0.01694929040968418, 'timestamp': '2025-09-30 23:08:57.801727', 'step': 5111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:57.854861', 'step': 5111, 'epoch': 3} {'type': 'loss', 'content': 0.0008084354340098798, 'timestamp': '2025-09-30 23:08:57.860841', 'step': 5112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:57.913631', 'step': 5112, 'epoch': 3} {'type': 'loss', 'content': 0.00011559982522157952, 'timestamp': '2025-09-30 23:08:57.916115', 'step': 5113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:57.969891', 'step': 5113, 'epoch': 3} {'type': 'loss', 'content': 0.01738283410668373, 'timestamp': '2025-09-30 23:08:57.972486', 'step': 5114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:58.028813', 'step': 5114, 'epoch': 3} {'type': 'loss', 'content': 0.0024555984418839216, 'timestamp': '2025-09-30 23:08:58.033047', 'step': 5115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:58.088574', 'step': 5115, 'epoch': 3} {'type': 'loss', 'content': 0.01607268489897251, 'timestamp': '2025-09-30 23:08:58.094610', 'step': 5116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:58.147567', 'step': 5116, 'epoch': 3} {'type': 'loss', 'content': 0.025319883599877357, 'timestamp': '2025-09-30 23:08:58.150109', 'step': 5117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:58.203234', 'step': 5117, 'epoch': 3} {'type': 'loss', 'content': 0.009996123611927032, 'timestamp': '2025-09-30 23:08:58.205481', 'step': 5118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:58.259215', 'step': 5118, 'epoch': 3} {'type': 'loss', 'content': 0.003302133409306407, 'timestamp': '2025-09-30 23:08:58.262046', 'step': 5119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:58.314910', 'step': 5119, 'epoch': 3} {'type': 'loss', 'content': 0.02867814712226391, 'timestamp': '2025-09-30 23:08:58.320914', 'step': 5120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:58.374369', 'step': 5120, 'epoch': 3} {'type': 'loss', 'content': 0.02043714001774788, 'timestamp': '2025-09-30 23:08:58.376899', 'step': 5121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:58.429782', 'step': 5121, 'epoch': 3} {'type': 'loss', 'content': 0.0008826993289403617, 'timestamp': '2025-09-30 23:08:58.431781', 'step': 5122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:58.484799', 'step': 5122, 'epoch': 3} {'type': 'loss', 'content': 0.008031884208321571, 'timestamp': '2025-09-30 23:08:58.486949', 'step': 5123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:58.540569', 'step': 5123, 'epoch': 3} {'type': 'loss', 'content': 0.014781566336750984, 'timestamp': '2025-09-30 23:08:58.546381', 'step': 5124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:58.598994', 'step': 5124, 'epoch': 3} {'type': 'loss', 'content': 0.07544510811567307, 'timestamp': '2025-09-30 23:08:58.601388', 'step': 5125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:58.657981', 'step': 5125, 'epoch': 3} {'type': 'loss', 'content': 0.0007193089695647359, 'timestamp': '2025-09-30 23:08:58.660441', 'step': 5126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:58.713748', 'step': 5126, 'epoch': 3} {'type': 'loss', 'content': 0.008882918395102024, 'timestamp': '2025-09-30 23:08:58.716214', 'step': 5127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:58.770420', 'step': 5127, 'epoch': 3} {'type': 'loss', 'content': 0.0036191302351653576, 'timestamp': '2025-09-30 23:08:58.776471', 'step': 5128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:58.829208', 'step': 5128, 'epoch': 3} {'type': 'loss', 'content': 0.005000484641641378, 'timestamp': '2025-09-30 23:08:58.831100', 'step': 5129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:58.884015', 'step': 5129, 'epoch': 3} {'type': 'loss', 'content': 0.0010596716310828924, 'timestamp': '2025-09-30 23:08:58.887304', 'step': 5130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:58.942416', 'step': 5130, 'epoch': 3} {'type': 'loss', 'content': 0.009277482517063618, 'timestamp': '2025-09-30 23:08:58.944863', 'step': 5131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:58.997866', 'step': 5131, 'epoch': 3} {'type': 'loss', 'content': 0.032187119126319885, 'timestamp': '2025-09-30 23:08:59.003492', 'step': 5132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:59.057540', 'step': 5132, 'epoch': 3} {'type': 'loss', 'content': 0.016485992819070816, 'timestamp': '2025-09-30 23:08:59.059841', 'step': 5133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:59.117299', 'step': 5133, 'epoch': 3} {'type': 'loss', 'content': 0.007839455269277096, 'timestamp': '2025-09-30 23:08:59.119779', 'step': 5134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:59.173544', 'step': 5134, 'epoch': 3} {'type': 'loss', 'content': 0.03929907828569412, 'timestamp': '2025-09-30 23:08:59.176137', 'step': 5135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:59.230798', 'step': 5135, 'epoch': 3} {'type': 'loss', 'content': 0.006246368400752544, 'timestamp': '2025-09-30 23:08:59.236381', 'step': 5136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:59.288923', 'step': 5136, 'epoch': 3} {'type': 'loss', 'content': 0.0038458441849797964, 'timestamp': '2025-09-30 23:08:59.293048', 'step': 5137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:59.347355', 'step': 5137, 'epoch': 3} {'type': 'loss', 'content': 0.0013179249363020062, 'timestamp': '2025-09-30 23:08:59.349824', 'step': 5138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:59.403125', 'step': 5138, 'epoch': 3} {'type': 'loss', 'content': 0.00221323617734015, 'timestamp': '2025-09-30 23:08:59.405508', 'step': 5139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:59.465401', 'step': 5139, 'epoch': 3} {'type': 'loss', 'content': 0.002269770484417677, 'timestamp': '2025-09-30 23:08:59.471260', 'step': 5140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:59.523757', 'step': 5140, 'epoch': 3} {'type': 'loss', 'content': 0.010348697192966938, 'timestamp': '2025-09-30 23:08:59.525980', 'step': 5141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:59.578974', 'step': 5141, 'epoch': 3} {'type': 'loss', 'content': 0.01154576987028122, 'timestamp': '2025-09-30 23:08:59.581293', 'step': 5142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:59.635107', 'step': 5142, 'epoch': 3} {'type': 'loss', 'content': 0.018505459651350975, 'timestamp': '2025-09-30 23:08:59.637703', 'step': 5143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:59.690662', 'step': 5143, 'epoch': 3} {'type': 'loss', 'content': 0.0016415921272709966, 'timestamp': '2025-09-30 23:08:59.696499', 'step': 5144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:08:59.758676', 'step': 5144, 'epoch': 3} {'type': 'loss', 'content': 0.0019249318866059184, 'timestamp': '2025-09-30 23:08:59.761115', 'step': 5145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:59.814702', 'step': 5145, 'epoch': 3} {'type': 'loss', 'content': 0.013722493313252926, 'timestamp': '2025-09-30 23:08:59.817236', 'step': 5146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:08:59.877943', 'step': 5146, 'epoch': 3} {'type': 'loss', 'content': 0.049711938947439194, 'timestamp': '2025-09-30 23:08:59.880917', 'step': 5147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:59.934884', 'step': 5147, 'epoch': 3} {'type': 'loss', 'content': 0.0039957803674042225, 'timestamp': '2025-09-30 23:08:59.941073', 'step': 5148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:08:59.997883', 'step': 5148, 'epoch': 3} {'type': 'loss', 'content': 0.036944326013326645, 'timestamp': '2025-09-30 23:09:00.003765', 'step': 5149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:00.057401', 'step': 5149, 'epoch': 3} {'type': 'loss', 'content': 0.02747860550880432, 'timestamp': '2025-09-30 23:09:00.059827', 'step': 5150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:00.113025', 'step': 5150, 'epoch': 3} {'type': 'loss', 'content': 0.0477561354637146, 'timestamp': '2025-09-30 23:09:00.115456', 'step': 5151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:00.168998', 'step': 5151, 'epoch': 3} {'type': 'loss', 'content': 0.005925637669861317, 'timestamp': '2025-09-30 23:09:00.174899', 'step': 5152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:00.227378', 'step': 5152, 'epoch': 3} {'type': 'loss', 'content': 0.0013082133373245597, 'timestamp': '2025-09-30 23:09:00.229462', 'step': 5153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:00.282343', 'step': 5153, 'epoch': 3} {'type': 'loss', 'content': 0.05324948951601982, 'timestamp': '2025-09-30 23:09:00.286038', 'step': 5154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:00.341156', 'step': 5154, 'epoch': 3} {'type': 'loss', 'content': 0.02100289799273014, 'timestamp': '2025-09-30 23:09:00.343450', 'step': 5155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:00.396739', 'step': 5155, 'epoch': 3} {'type': 'loss', 'content': 0.01652793399989605, 'timestamp': '2025-09-30 23:09:00.402600', 'step': 5156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:00.455527', 'step': 5156, 'epoch': 3} {'type': 'loss', 'content': 0.00543561577796936, 'timestamp': '2025-09-30 23:09:00.457953', 'step': 5157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:00.511027', 'step': 5157, 'epoch': 3} {'type': 'loss', 'content': 0.002090069931000471, 'timestamp': '2025-09-30 23:09:00.513335', 'step': 5158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:00.567159', 'step': 5158, 'epoch': 3} {'type': 'loss', 'content': 0.0009650463471189141, 'timestamp': '2025-09-30 23:09:00.569664', 'step': 5159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:00.622683', 'step': 5159, 'epoch': 3} {'type': 'loss', 'content': 0.010653341189026833, 'timestamp': '2025-09-30 23:09:00.628157', 'step': 5160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:00.681891', 'step': 5160, 'epoch': 3} {'type': 'loss', 'content': 0.02403322234749794, 'timestamp': '2025-09-30 23:09:00.684244', 'step': 5161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:00.737675', 'step': 5161, 'epoch': 3} {'type': 'loss', 'content': 0.01591474935412407, 'timestamp': '2025-09-30 23:09:00.740829', 'step': 5162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:00.797564', 'step': 5162, 'epoch': 3} {'type': 'loss', 'content': 0.0031649828888475895, 'timestamp': '2025-09-30 23:09:00.799962', 'step': 5163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:00.853151', 'step': 5163, 'epoch': 3} {'type': 'loss', 'content': 0.006554530002176762, 'timestamp': '2025-09-30 23:09:00.859176', 'step': 5164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:00.912048', 'step': 5164, 'epoch': 3} {'type': 'loss', 'content': 0.006227429490536451, 'timestamp': '2025-09-30 23:09:00.914363', 'step': 5165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:00.967688', 'step': 5165, 'epoch': 3} {'type': 'loss', 'content': 0.06724613159894943, 'timestamp': '2025-09-30 23:09:00.970141', 'step': 5166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:01.024374', 'step': 5166, 'epoch': 3} {'type': 'loss', 'content': 0.020584987476468086, 'timestamp': '2025-09-30 23:09:01.026445', 'step': 5167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:01.079200', 'step': 5167, 'epoch': 3} {'type': 'loss', 'content': 0.011553158052265644, 'timestamp': '2025-09-30 23:09:01.085141', 'step': 5168, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1596914505344}], 'timestamp': '2025-09-30 23:09:04.637038', 'step': 5168, 'epoch': 3} {'type': 'pplx', 'content': 8617629.514827041, 'timestamp': '2025-09-30 23:09:04.639351', 'step': 5168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:04.690332', 'step': 5168, 'epoch': 3} {'type': 'loss', 'content': 0.003659443696960807, 'timestamp': '2025-09-30 23:09:04.692558', 'step': 5169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:04.756294', 'step': 5169, 'epoch': 3} {'type': 'loss', 'content': 0.009120610542595387, 'timestamp': '2025-09-30 23:09:04.760892', 'step': 5170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:04.814952', 'step': 5170, 'epoch': 3} {'type': 'loss', 'content': 0.0020543003920465708, 'timestamp': '2025-09-30 23:09:04.817274', 'step': 5171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:04.873733', 'step': 5171, 'epoch': 3} {'type': 'loss', 'content': 0.004674999974668026, 'timestamp': '2025-09-30 23:09:04.879671', 'step': 5172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:04.938940', 'step': 5172, 'epoch': 3} {'type': 'loss', 'content': 0.002571872202679515, 'timestamp': '2025-09-30 23:09:04.940833', 'step': 5173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:04.994063', 'step': 5173, 'epoch': 3} {'type': 'loss', 'content': 0.005850149784237146, 'timestamp': '2025-09-30 23:09:04.996467', 'step': 5174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:05.049646', 'step': 5174, 'epoch': 3} {'type': 'loss', 'content': 0.018444916233420372, 'timestamp': '2025-09-30 23:09:05.051879', 'step': 5175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:05.105142', 'step': 5175, 'epoch': 3} {'type': 'loss', 'content': 0.0011005207197740674, 'timestamp': '2025-09-30 23:09:05.110996', 'step': 5176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:05.163701', 'step': 5176, 'epoch': 3} {'type': 'loss', 'content': 0.01822640560567379, 'timestamp': '2025-09-30 23:09:05.166317', 'step': 5177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:05.219563', 'step': 5177, 'epoch': 3} {'type': 'loss', 'content': 0.010191796347498894, 'timestamp': '2025-09-30 23:09:05.222176', 'step': 5178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:05.276012', 'step': 5178, 'epoch': 3} {'type': 'loss', 'content': 0.0007526357076130807, 'timestamp': '2025-09-30 23:09:05.278717', 'step': 5179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:05.331492', 'step': 5179, 'epoch': 3} {'type': 'loss', 'content': 0.0013807142386212945, 'timestamp': '2025-09-30 23:09:05.337277', 'step': 5180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:05.389246', 'step': 5180, 'epoch': 3} {'type': 'loss', 'content': 0.009606355801224709, 'timestamp': '2025-09-30 23:09:05.392257', 'step': 5181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:05.445203', 'step': 5181, 'epoch': 3} {'type': 'loss', 'content': 0.009664413519203663, 'timestamp': '2025-09-30 23:09:05.446974', 'step': 5182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:05.499235', 'step': 5182, 'epoch': 3} {'type': 'loss', 'content': 0.034669630229473114, 'timestamp': '2025-09-30 23:09:05.501881', 'step': 5183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:05.554491', 'step': 5183, 'epoch': 3} {'type': 'loss', 'content': 0.0013106790138408542, 'timestamp': '2025-09-30 23:09:05.560386', 'step': 5184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:05.613086', 'step': 5184, 'epoch': 3} {'type': 'loss', 'content': 0.014997689984738827, 'timestamp': '2025-09-30 23:09:05.615037', 'step': 5185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:05.667750', 'step': 5185, 'epoch': 3} {'type': 'loss', 'content': 0.0032316073775291443, 'timestamp': '2025-09-30 23:09:05.670297', 'step': 5186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:05.723412', 'step': 5186, 'epoch': 3} {'type': 'loss', 'content': 0.005894364323467016, 'timestamp': '2025-09-30 23:09:05.725696', 'step': 5187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:05.778557', 'step': 5187, 'epoch': 3} {'type': 'loss', 'content': 0.004372005350887775, 'timestamp': '2025-09-30 23:09:05.783880', 'step': 5188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:05.836328', 'step': 5188, 'epoch': 3} {'type': 'loss', 'content': 0.006517514120787382, 'timestamp': '2025-09-30 23:09:05.838241', 'step': 5189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:05.891536', 'step': 5189, 'epoch': 3} {'type': 'loss', 'content': 0.012599033303558826, 'timestamp': '2025-09-30 23:09:05.893811', 'step': 5190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:05.949901', 'step': 5190, 'epoch': 3} {'type': 'loss', 'content': 0.00021649431437253952, 'timestamp': '2025-09-30 23:09:05.954701', 'step': 5191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:06.007518', 'step': 5191, 'epoch': 3} {'type': 'loss', 'content': 0.00433066301047802, 'timestamp': '2025-09-30 23:09:06.013202', 'step': 5192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:06.065332', 'step': 5192, 'epoch': 3} {'type': 'loss', 'content': 0.0036681077908724546, 'timestamp': '2025-09-30 23:09:06.067612', 'step': 5193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:06.121219', 'step': 5193, 'epoch': 3} {'type': 'loss', 'content': 0.009885953739285469, 'timestamp': '2025-09-30 23:09:06.123500', 'step': 5194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:06.176184', 'step': 5194, 'epoch': 3} {'type': 'loss', 'content': 0.0013693890068680048, 'timestamp': '2025-09-30 23:09:06.179188', 'step': 5195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:06.234970', 'step': 5195, 'epoch': 3} {'type': 'loss', 'content': 0.008188913576304913, 'timestamp': '2025-09-30 23:09:06.240808', 'step': 5196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:06.292754', 'step': 5196, 'epoch': 3} {'type': 'loss', 'content': 0.01676313765347004, 'timestamp': '2025-09-30 23:09:06.294545', 'step': 5197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:06.347483', 'step': 5197, 'epoch': 3} {'type': 'loss', 'content': 0.057730503380298615, 'timestamp': '2025-09-30 23:09:06.349389', 'step': 5198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:06.402846', 'step': 5198, 'epoch': 3} {'type': 'loss', 'content': 0.011301390826702118, 'timestamp': '2025-09-30 23:09:06.406219', 'step': 5199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:06.467367', 'step': 5199, 'epoch': 3} {'type': 'loss', 'content': 0.001958026085048914, 'timestamp': '2025-09-30 23:09:06.475924', 'step': 5200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:06.533270', 'step': 5200, 'epoch': 3} {'type': 'loss', 'content': 0.02219623327255249, 'timestamp': '2025-09-30 23:09:06.536379', 'step': 5201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:06.588648', 'step': 5201, 'epoch': 3} {'type': 'loss', 'content': 0.01451996061950922, 'timestamp': '2025-09-30 23:09:06.591470', 'step': 5202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:06.643919', 'step': 5202, 'epoch': 3} {'type': 'loss', 'content': 0.0010844680946320295, 'timestamp': '2025-09-30 23:09:06.645841', 'step': 5203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:06.697945', 'step': 5203, 'epoch': 3} {'type': 'loss', 'content': 0.004140745382755995, 'timestamp': '2025-09-30 23:09:06.703920', 'step': 5204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:06.756528', 'step': 5204, 'epoch': 3} {'type': 'loss', 'content': 0.004992220550775528, 'timestamp': '2025-09-30 23:09:06.758402', 'step': 5205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:06.816145', 'step': 5205, 'epoch': 3} {'type': 'loss', 'content': 0.010486133396625519, 'timestamp': '2025-09-30 23:09:06.818036', 'step': 5206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:06.870909', 'step': 5206, 'epoch': 3} {'type': 'loss', 'content': 0.011387413367629051, 'timestamp': '2025-09-30 23:09:06.873510', 'step': 5207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:06.927679', 'step': 5207, 'epoch': 3} {'type': 'loss', 'content': 0.00848620105534792, 'timestamp': '2025-09-30 23:09:06.934384', 'step': 5208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:06.987364', 'step': 5208, 'epoch': 3} {'type': 'loss', 'content': 0.027583912014961243, 'timestamp': '2025-09-30 23:09:06.989589', 'step': 5209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:07.046813', 'step': 5209, 'epoch': 3} {'type': 'loss', 'content': 0.044940102845430374, 'timestamp': '2025-09-30 23:09:07.049420', 'step': 5210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:07.102699', 'step': 5210, 'epoch': 3} {'type': 'loss', 'content': 0.020082805305719376, 'timestamp': '2025-09-30 23:09:07.105086', 'step': 5211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:07.161107', 'step': 5211, 'epoch': 3} {'type': 'loss', 'content': 0.0030477759428322315, 'timestamp': '2025-09-30 23:09:07.166840', 'step': 5212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:07.220181', 'step': 5212, 'epoch': 3} {'type': 'loss', 'content': 0.00018705411639530212, 'timestamp': '2025-09-30 23:09:07.222017', 'step': 5213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:07.274292', 'step': 5213, 'epoch': 3} {'type': 'loss', 'content': 0.003298074472695589, 'timestamp': '2025-09-30 23:09:07.276070', 'step': 5214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:07.336747', 'step': 5214, 'epoch': 3} {'type': 'loss', 'content': 0.0072654783725738525, 'timestamp': '2025-09-30 23:09:07.338865', 'step': 5215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:07.391809', 'step': 5215, 'epoch': 3} {'type': 'loss', 'content': 0.014095458202064037, 'timestamp': '2025-09-30 23:09:07.397624', 'step': 5216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:07.450009', 'step': 5216, 'epoch': 3} {'type': 'loss', 'content': 0.0023015073966234922, 'timestamp': '2025-09-30 23:09:07.452171', 'step': 5217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:07.505006', 'step': 5217, 'epoch': 3} {'type': 'loss', 'content': 0.004321974702179432, 'timestamp': '2025-09-30 23:09:07.508114', 'step': 5218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:07.571931', 'step': 5218, 'epoch': 3} {'type': 'loss', 'content': 0.010727360844612122, 'timestamp': '2025-09-30 23:09:07.574658', 'step': 5219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:07.628077', 'step': 5219, 'epoch': 3} {'type': 'loss', 'content': 0.004135883878916502, 'timestamp': '2025-09-30 23:09:07.633826', 'step': 5220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:07.688829', 'step': 5220, 'epoch': 3} {'type': 'loss', 'content': 0.0010084891691803932, 'timestamp': '2025-09-30 23:09:07.692416', 'step': 5221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:07.749623', 'step': 5221, 'epoch': 3} {'type': 'loss', 'content': 0.0247519351541996, 'timestamp': '2025-09-30 23:09:07.751856', 'step': 5222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:07.807461', 'step': 5222, 'epoch': 3} {'type': 'loss', 'content': 0.0015251879813149571, 'timestamp': '2025-09-30 23:09:07.816945', 'step': 5223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:07.875267', 'step': 5223, 'epoch': 3} {'type': 'loss', 'content': 0.00812580343335867, 'timestamp': '2025-09-30 23:09:07.882810', 'step': 5224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:07.940580', 'step': 5224, 'epoch': 3} {'type': 'loss', 'content': 0.018507758155465126, 'timestamp': '2025-09-30 23:09:07.945636', 'step': 5225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:08.005617', 'step': 5225, 'epoch': 3} {'type': 'loss', 'content': 0.0008152135997079313, 'timestamp': '2025-09-30 23:09:08.010342', 'step': 5226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:08.074628', 'step': 5226, 'epoch': 3} {'type': 'loss', 'content': 0.008410884998738766, 'timestamp': '2025-09-30 23:09:08.078908', 'step': 5227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:08.136497', 'step': 5227, 'epoch': 3} {'type': 'loss', 'content': 0.007262858096510172, 'timestamp': '2025-09-30 23:09:08.143160', 'step': 5228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:08.198921', 'step': 5228, 'epoch': 3} {'type': 'loss', 'content': 0.00727204279974103, 'timestamp': '2025-09-30 23:09:08.204987', 'step': 5229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:08.264893', 'step': 5229, 'epoch': 3} {'type': 'loss', 'content': 0.013495393097400665, 'timestamp': '2025-09-30 23:09:08.270464', 'step': 5230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:08.332118', 'step': 5230, 'epoch': 3} {'type': 'loss', 'content': 0.001971115358173847, 'timestamp': '2025-09-30 23:09:08.335695', 'step': 5231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:08.402896', 'step': 5231, 'epoch': 3} {'type': 'loss', 'content': 0.0006537241861224174, 'timestamp': '2025-09-30 23:09:08.410523', 'step': 5232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:08.468218', 'step': 5232, 'epoch': 3} {'type': 'loss', 'content': 8.699121826793998e-05, 'timestamp': '2025-09-30 23:09:08.472512', 'step': 5233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:08.529231', 'step': 5233, 'epoch': 3} {'type': 'loss', 'content': 0.004775789566338062, 'timestamp': '2025-09-30 23:09:08.534019', 'step': 5234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:08.592115', 'step': 5234, 'epoch': 3} {'type': 'loss', 'content': 0.00012628818512894213, 'timestamp': '2025-09-30 23:09:08.595461', 'step': 5235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:08.651645', 'step': 5235, 'epoch': 3} {'type': 'loss', 'content': 0.006891563534736633, 'timestamp': '2025-09-30 23:09:08.660201', 'step': 5236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:08.719584', 'step': 5236, 'epoch': 3} {'type': 'loss', 'content': 0.001714274869300425, 'timestamp': '2025-09-30 23:09:08.724402', 'step': 5237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:08.793898', 'step': 5237, 'epoch': 3} {'type': 'loss', 'content': 0.022769030183553696, 'timestamp': '2025-09-30 23:09:08.799738', 'step': 5238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:08.859912', 'step': 5238, 'epoch': 3} {'type': 'loss', 'content': 0.003553158836439252, 'timestamp': '2025-09-30 23:09:08.866892', 'step': 5239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:08.925621', 'step': 5239, 'epoch': 3} {'type': 'loss', 'content': 0.0007938022608868778, 'timestamp': '2025-09-30 23:09:08.931532', 'step': 5240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:08.995097', 'step': 5240, 'epoch': 3} {'type': 'loss', 'content': 0.013929630629718304, 'timestamp': '2025-09-30 23:09:08.997384', 'step': 5241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:09.055141', 'step': 5241, 'epoch': 3} {'type': 'loss', 'content': 0.0032192051876336336, 'timestamp': '2025-09-30 23:09:09.059563', 'step': 5242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:09.118920', 'step': 5242, 'epoch': 3} {'type': 'loss', 'content': 0.03437340632081032, 'timestamp': '2025-09-30 23:09:09.122090', 'step': 5243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:09.180716', 'step': 5243, 'epoch': 3} {'type': 'loss', 'content': 0.0009642824297770858, 'timestamp': '2025-09-30 23:09:09.189409', 'step': 5244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:09.247044', 'step': 5244, 'epoch': 3} {'type': 'loss', 'content': 0.00012679488281719387, 'timestamp': '2025-09-30 23:09:09.250237', 'step': 5245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:09.312480', 'step': 5245, 'epoch': 3} {'type': 'loss', 'content': 0.0004661930142901838, 'timestamp': '2025-09-30 23:09:09.314880', 'step': 5246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:09.368193', 'step': 5246, 'epoch': 3} {'type': 'loss', 'content': 0.0010547253768891096, 'timestamp': '2025-09-30 23:09:09.370889', 'step': 5247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:09.424474', 'step': 5247, 'epoch': 3} {'type': 'loss', 'content': 0.004346855450421572, 'timestamp': '2025-09-30 23:09:09.430551', 'step': 5248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:09.483626', 'step': 5248, 'epoch': 3} {'type': 'loss', 'content': 0.00026957347290590405, 'timestamp': '2025-09-30 23:09:09.486545', 'step': 5249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:09.539701', 'step': 5249, 'epoch': 3} {'type': 'loss', 'content': 0.002692902460694313, 'timestamp': '2025-09-30 23:09:09.542087', 'step': 5250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:09.596422', 'step': 5250, 'epoch': 3} {'type': 'loss', 'content': 0.00021354247292038053, 'timestamp': '2025-09-30 23:09:09.598982', 'step': 5251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:09.652417', 'step': 5251, 'epoch': 3} {'type': 'loss', 'content': 0.011189057491719723, 'timestamp': '2025-09-30 23:09:09.658393', 'step': 5252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:09.710584', 'step': 5252, 'epoch': 3} {'type': 'loss', 'content': 0.00044830227852799, 'timestamp': '2025-09-30 23:09:09.714219', 'step': 5253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:09.771387', 'step': 5253, 'epoch': 3} {'type': 'loss', 'content': 0.017170170322060585, 'timestamp': '2025-09-30 23:09:09.773547', 'step': 5254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:09.826446', 'step': 5254, 'epoch': 3} {'type': 'loss', 'content': 0.00032259029103443027, 'timestamp': '2025-09-30 23:09:09.828729', 'step': 5255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:09.882175', 'step': 5255, 'epoch': 3} {'type': 'loss', 'content': 3.943900446756743e-05, 'timestamp': '2025-09-30 23:09:09.888096', 'step': 5256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:09.940608', 'step': 5256, 'epoch': 3} {'type': 'loss', 'content': 0.010860021226108074, 'timestamp': '2025-09-30 23:09:09.942827', 'step': 5257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:09.995986', 'step': 5257, 'epoch': 3} {'type': 'loss', 'content': 0.00030651758424937725, 'timestamp': '2025-09-30 23:09:10.002078', 'step': 5258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:10.063426', 'step': 5258, 'epoch': 3} {'type': 'loss', 'content': 0.0017381685320287943, 'timestamp': '2025-09-30 23:09:10.065783', 'step': 5259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:10.118944', 'step': 5259, 'epoch': 3} {'type': 'loss', 'content': 0.039196569472551346, 'timestamp': '2025-09-30 23:09:10.124805', 'step': 5260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:10.177485', 'step': 5260, 'epoch': 3} {'type': 'loss', 'content': 0.001560084754601121, 'timestamp': '2025-09-30 23:09:10.180465', 'step': 5261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:10.234473', 'step': 5261, 'epoch': 3} {'type': 'loss', 'content': 4.8694811994209886e-05, 'timestamp': '2025-09-30 23:09:10.236614', 'step': 5262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:10.291533', 'step': 5262, 'epoch': 3} {'type': 'loss', 'content': 0.05271342024207115, 'timestamp': '2025-09-30 23:09:10.293840', 'step': 5263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:10.346631', 'step': 5263, 'epoch': 3} {'type': 'loss', 'content': 0.003044451354071498, 'timestamp': '2025-09-30 23:09:10.352684', 'step': 5264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:10.409277', 'step': 5264, 'epoch': 3} {'type': 'loss', 'content': 0.0005064812139607966, 'timestamp': '2025-09-30 23:09:10.412725', 'step': 5265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:10.469462', 'step': 5265, 'epoch': 3} {'type': 'loss', 'content': 0.00014456533244810998, 'timestamp': '2025-09-30 23:09:10.471704', 'step': 5266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:10.525351', 'step': 5266, 'epoch': 3} {'type': 'loss', 'content': 0.028492730110883713, 'timestamp': '2025-09-30 23:09:10.527761', 'step': 5267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:10.582481', 'step': 5267, 'epoch': 3} {'type': 'loss', 'content': 4.4329357479000464e-05, 'timestamp': '2025-09-30 23:09:10.588986', 'step': 5268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:10.641492', 'step': 5268, 'epoch': 3} {'type': 'loss', 'content': 0.002742444397881627, 'timestamp': '2025-09-30 23:09:10.649490', 'step': 5269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:10.709321', 'step': 5269, 'epoch': 3} {'type': 'loss', 'content': 0.007956338115036488, 'timestamp': '2025-09-30 23:09:10.713834', 'step': 5270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:10.770224', 'step': 5270, 'epoch': 3} {'type': 'loss', 'content': 0.014080465771257877, 'timestamp': '2025-09-30 23:09:10.773458', 'step': 5271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:10.828771', 'step': 5271, 'epoch': 3} {'type': 'loss', 'content': 0.08999180793762207, 'timestamp': '2025-09-30 23:09:10.834442', 'step': 5272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:10.928159', 'step': 5272, 'epoch': 3} {'type': 'loss', 'content': 0.00015111277753021568, 'timestamp': '2025-09-30 23:09:10.931063', 'step': 5273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:10.987469', 'step': 5273, 'epoch': 3} {'type': 'loss', 'content': 0.0005603682948276401, 'timestamp': '2025-09-30 23:09:10.990214', 'step': 5274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:11.045276', 'step': 5274, 'epoch': 3} {'type': 'loss', 'content': 0.005274053197354078, 'timestamp': '2025-09-30 23:09:11.048025', 'step': 5275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:11.103317', 'step': 5275, 'epoch': 3} {'type': 'loss', 'content': 0.03007495030760765, 'timestamp': '2025-09-30 23:09:11.110045', 'step': 5276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:09:11.166574', 'step': 5276, 'epoch': 3} {'type': 'loss', 'content': 0.03689415380358696, 'timestamp': '2025-09-30 23:09:11.170552', 'step': 5277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:11.225440', 'step': 5277, 'epoch': 3} {'type': 'loss', 'content': 0.00038743691402487457, 'timestamp': '2025-09-30 23:09:11.228538', 'step': 5278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:11.293691', 'step': 5278, 'epoch': 3} {'type': 'loss', 'content': 0.001279396004974842, 'timestamp': '2025-09-30 23:09:11.296757', 'step': 5279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:11.351892', 'step': 5279, 'epoch': 3} {'type': 'loss', 'content': 0.013757949694991112, 'timestamp': '2025-09-30 23:09:11.363617', 'step': 5280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:11.418794', 'step': 5280, 'epoch': 3} {'type': 'loss', 'content': 0.0007490920834243298, 'timestamp': '2025-09-30 23:09:11.422034', 'step': 5281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:11.475840', 'step': 5281, 'epoch': 3} {'type': 'loss', 'content': 0.01314145140349865, 'timestamp': '2025-09-30 23:09:11.478783', 'step': 5282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:11.535015', 'step': 5282, 'epoch': 3} {'type': 'loss', 'content': 0.04536503180861473, 'timestamp': '2025-09-30 23:09:11.537772', 'step': 5283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:11.594173', 'step': 5283, 'epoch': 3} {'type': 'loss', 'content': 0.0009980871109291911, 'timestamp': '2025-09-30 23:09:11.600372', 'step': 5284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:11.654994', 'step': 5284, 'epoch': 3} {'type': 'loss', 'content': 0.0005565377068705857, 'timestamp': '2025-09-30 23:09:11.657491', 'step': 5285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:11.714145', 'step': 5285, 'epoch': 3} {'type': 'loss', 'content': 0.0004701591096818447, 'timestamp': '2025-09-30 23:09:11.718788', 'step': 5286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:11.782660', 'step': 5286, 'epoch': 3} {'type': 'loss', 'content': 0.047006186097860336, 'timestamp': '2025-09-30 23:09:11.787422', 'step': 5287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:11.846368', 'step': 5287, 'epoch': 3} {'type': 'loss', 'content': 0.0005858189542777836, 'timestamp': '2025-09-30 23:09:11.853341', 'step': 5288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:11.908085', 'step': 5288, 'epoch': 3} {'type': 'loss', 'content': 0.015743928030133247, 'timestamp': '2025-09-30 23:09:11.911295', 'step': 5289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:11.965780', 'step': 5289, 'epoch': 3} {'type': 'loss', 'content': 0.051435526460409164, 'timestamp': '2025-09-30 23:09:11.967985', 'step': 5290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:12.022572', 'step': 5290, 'epoch': 3} {'type': 'loss', 'content': 0.0011017018696293235, 'timestamp': '2025-09-30 23:09:12.025466', 'step': 5291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:12.079889', 'step': 5291, 'epoch': 3} {'type': 'loss', 'content': 0.03386596217751503, 'timestamp': '2025-09-30 23:09:12.086254', 'step': 5292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:12.142127', 'step': 5292, 'epoch': 3} {'type': 'loss', 'content': 0.00013619536184705794, 'timestamp': '2025-09-30 23:09:12.145415', 'step': 5293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:12.205420', 'step': 5293, 'epoch': 3} {'type': 'loss', 'content': 0.0011541730491444468, 'timestamp': '2025-09-30 23:09:12.209448', 'step': 5294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:12.265613', 'step': 5294, 'epoch': 3} {'type': 'loss', 'content': 0.027990881353616714, 'timestamp': '2025-09-30 23:09:12.268564', 'step': 5295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:12.322962', 'step': 5295, 'epoch': 3} {'type': 'loss', 'content': 0.004424522630870342, 'timestamp': '2025-09-30 23:09:12.329795', 'step': 5296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:12.382201', 'step': 5296, 'epoch': 3} {'type': 'loss', 'content': 0.012078171595931053, 'timestamp': '2025-09-30 23:09:12.385244', 'step': 5297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:12.438202', 'step': 5297, 'epoch': 3} {'type': 'loss', 'content': 0.015736190602183342, 'timestamp': '2025-09-30 23:09:12.441163', 'step': 5298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:12.494396', 'step': 5298, 'epoch': 3} {'type': 'loss', 'content': 0.008698755875229836, 'timestamp': '2025-09-30 23:09:12.496806', 'step': 5299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:12.550098', 'step': 5299, 'epoch': 3} {'type': 'loss', 'content': 0.01066599041223526, 'timestamp': '2025-09-30 23:09:12.555962', 'step': 5300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:12.608343', 'step': 5300, 'epoch': 3} {'type': 'loss', 'content': 0.0003520377504173666, 'timestamp': '2025-09-30 23:09:12.610925', 'step': 5301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:12.664332', 'step': 5301, 'epoch': 3} {'type': 'loss', 'content': 0.014063196256756783, 'timestamp': '2025-09-30 23:09:12.667129', 'step': 5302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:12.720898', 'step': 5302, 'epoch': 3} {'type': 'loss', 'content': 0.0006473431712947786, 'timestamp': '2025-09-30 23:09:12.723121', 'step': 5303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:12.776288', 'step': 5303, 'epoch': 3} {'type': 'loss', 'content': 0.000475444074254483, 'timestamp': '2025-09-30 23:09:12.783145', 'step': 5304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:12.836770', 'step': 5304, 'epoch': 3} {'type': 'loss', 'content': 0.0005903700366616249, 'timestamp': '2025-09-30 23:09:12.839261', 'step': 5305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:12.892531', 'step': 5305, 'epoch': 3} {'type': 'loss', 'content': 0.005253259092569351, 'timestamp': '2025-09-30 23:09:12.895021', 'step': 5306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:12.951636', 'step': 5306, 'epoch': 3} {'type': 'loss', 'content': 0.007154454011470079, 'timestamp': '2025-09-30 23:09:12.953884', 'step': 5307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:13.009807', 'step': 5307, 'epoch': 3} {'type': 'loss', 'content': 0.01180062536150217, 'timestamp': '2025-09-30 23:09:13.018791', 'step': 5308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:13.071711', 'step': 5308, 'epoch': 3} {'type': 'loss', 'content': 0.0020923237316310406, 'timestamp': '2025-09-30 23:09:13.074854', 'step': 5309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:13.128469', 'step': 5309, 'epoch': 3} {'type': 'loss', 'content': 0.01207365095615387, 'timestamp': '2025-09-30 23:09:13.131153', 'step': 5310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:13.185760', 'step': 5310, 'epoch': 3} {'type': 'loss', 'content': 0.001525301719084382, 'timestamp': '2025-09-30 23:09:13.188181', 'step': 5311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:13.242303', 'step': 5311, 'epoch': 3} {'type': 'loss', 'content': 0.0006018924759700894, 'timestamp': '2025-09-30 23:09:13.248412', 'step': 5312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:13.301402', 'step': 5312, 'epoch': 3} {'type': 'loss', 'content': 0.05980117246508598, 'timestamp': '2025-09-30 23:09:13.303741', 'step': 5313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:13.357109', 'step': 5313, 'epoch': 3} {'type': 'loss', 'content': 0.0036411152686923742, 'timestamp': '2025-09-30 23:09:13.359612', 'step': 5314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:13.412920', 'step': 5314, 'epoch': 3} {'type': 'loss', 'content': 0.0016178683144971728, 'timestamp': '2025-09-30 23:09:13.415027', 'step': 5315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:13.468804', 'step': 5315, 'epoch': 3} {'type': 'loss', 'content': 0.007554852403700352, 'timestamp': '2025-09-30 23:09:13.474471', 'step': 5316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:13.527249', 'step': 5316, 'epoch': 3} {'type': 'loss', 'content': 0.028080355376005173, 'timestamp': '2025-09-30 23:09:13.530353', 'step': 5317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:13.599993', 'step': 5317, 'epoch': 3} {'type': 'loss', 'content': 0.014153129421174526, 'timestamp': '2025-09-30 23:09:13.603065', 'step': 5318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:13.657116', 'step': 5318, 'epoch': 3} {'type': 'loss', 'content': 0.0002511128259357065, 'timestamp': '2025-09-30 23:09:13.659339', 'step': 5319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:13.713348', 'step': 5319, 'epoch': 3} {'type': 'loss', 'content': 0.03908878564834595, 'timestamp': '2025-09-30 23:09:13.719206', 'step': 5320, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1596914505344}], 'timestamp': '2025-09-30 23:09:17.638911', 'step': 5320, 'epoch': 3} {'type': 'pplx', 'content': 9733620.314749457, 'timestamp': '2025-09-30 23:09:17.641148', 'step': 5320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:17.693549', 'step': 5320, 'epoch': 3} {'type': 'loss', 'content': 0.0076550147496163845, 'timestamp': '2025-09-30 23:09:17.695725', 'step': 5321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:17.751589', 'step': 5321, 'epoch': 3} {'type': 'loss', 'content': 0.001768341870047152, 'timestamp': '2025-09-30 23:09:17.753828', 'step': 5322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:17.810275', 'step': 5322, 'epoch': 3} {'type': 'loss', 'content': 0.0004906331305392087, 'timestamp': '2025-09-30 23:09:17.812733', 'step': 5323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:17.865526', 'step': 5323, 'epoch': 3} {'type': 'loss', 'content': 0.03757036477327347, 'timestamp': '2025-09-30 23:09:17.871527', 'step': 5324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:17.924021', 'step': 5324, 'epoch': 3} {'type': 'loss', 'content': 0.04333684220910072, 'timestamp': '2025-09-30 23:09:17.926089', 'step': 5325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:17.979112', 'step': 5325, 'epoch': 3} {'type': 'loss', 'content': 0.01341309305280447, 'timestamp': '2025-09-30 23:09:17.981386', 'step': 5326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:18.036170', 'step': 5326, 'epoch': 3} {'type': 'loss', 'content': 0.0015881260624155402, 'timestamp': '2025-09-30 23:09:18.038602', 'step': 5327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:18.104047', 'step': 5327, 'epoch': 3} {'type': 'loss', 'content': 0.08013016730546951, 'timestamp': '2025-09-30 23:09:18.111155', 'step': 5328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:18.167258', 'step': 5328, 'epoch': 3} {'type': 'loss', 'content': 0.0020402553491294384, 'timestamp': '2025-09-30 23:09:18.169834', 'step': 5329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:18.233884', 'step': 5329, 'epoch': 3} {'type': 'loss', 'content': 0.00987956952303648, 'timestamp': '2025-09-30 23:09:18.236373', 'step': 5330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:18.290012', 'step': 5330, 'epoch': 3} {'type': 'loss', 'content': 0.02037370204925537, 'timestamp': '2025-09-30 23:09:18.292340', 'step': 5331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:18.345310', 'step': 5331, 'epoch': 3} {'type': 'loss', 'content': 0.008254697546362877, 'timestamp': '2025-09-30 23:09:18.351598', 'step': 5332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:18.408664', 'step': 5332, 'epoch': 3} {'type': 'loss', 'content': 0.003429512260481715, 'timestamp': '2025-09-30 23:09:18.413986', 'step': 5333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:18.467101', 'step': 5333, 'epoch': 3} {'type': 'loss', 'content': 0.006348379887640476, 'timestamp': '2025-09-30 23:09:18.470053', 'step': 5334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:18.523245', 'step': 5334, 'epoch': 3} {'type': 'loss', 'content': 0.023979267105460167, 'timestamp': '2025-09-30 23:09:18.525537', 'step': 5335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:18.580901', 'step': 5335, 'epoch': 3} {'type': 'loss', 'content': 0.009603306651115417, 'timestamp': '2025-09-30 23:09:18.587361', 'step': 5336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:18.649473', 'step': 5336, 'epoch': 3} {'type': 'loss', 'content': 0.04590420797467232, 'timestamp': '2025-09-30 23:09:18.651729', 'step': 5337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:18.705174', 'step': 5337, 'epoch': 3} {'type': 'loss', 'content': 0.04952482506632805, 'timestamp': '2025-09-30 23:09:18.711098', 'step': 5338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:18.766062', 'step': 5338, 'epoch': 3} {'type': 'loss', 'content': 0.03307211771607399, 'timestamp': '2025-09-30 23:09:18.768417', 'step': 5339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:18.824808', 'step': 5339, 'epoch': 3} {'type': 'loss', 'content': 0.014221221208572388, 'timestamp': '2025-09-30 23:09:18.833966', 'step': 5340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:18.893013', 'step': 5340, 'epoch': 3} {'type': 'loss', 'content': 0.008930934593081474, 'timestamp': '2025-09-30 23:09:18.897187', 'step': 5341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:18.953111', 'step': 5341, 'epoch': 3} {'type': 'loss', 'content': 0.00511358305811882, 'timestamp': '2025-09-30 23:09:18.955353', 'step': 5342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:19.009827', 'step': 5342, 'epoch': 3} {'type': 'loss', 'content': 0.01917797327041626, 'timestamp': '2025-09-30 23:09:19.012593', 'step': 5343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:19.067042', 'step': 5343, 'epoch': 3} {'type': 'loss', 'content': 0.015102386474609375, 'timestamp': '2025-09-30 23:09:19.073238', 'step': 5344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:19.127170', 'step': 5344, 'epoch': 3} {'type': 'loss', 'content': 0.0057210251688957214, 'timestamp': '2025-09-30 23:09:19.130850', 'step': 5345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:19.187945', 'step': 5345, 'epoch': 3} {'type': 'loss', 'content': 0.053066883236169815, 'timestamp': '2025-09-30 23:09:19.192531', 'step': 5346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:19.250644', 'step': 5346, 'epoch': 3} {'type': 'loss', 'content': 0.0045890649780631065, 'timestamp': '2025-09-30 23:09:19.253425', 'step': 5347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:19.308170', 'step': 5347, 'epoch': 3} {'type': 'loss', 'content': 0.0032359580509364605, 'timestamp': '2025-09-30 23:09:19.314648', 'step': 5348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:19.368243', 'step': 5348, 'epoch': 3} {'type': 'loss', 'content': 0.02050250582396984, 'timestamp': '2025-09-30 23:09:19.371474', 'step': 5349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:19.428501', 'step': 5349, 'epoch': 3} {'type': 'loss', 'content': 0.011126445606350899, 'timestamp': '2025-09-30 23:09:19.432525', 'step': 5350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:19.489145', 'step': 5350, 'epoch': 3} {'type': 'loss', 'content': 0.0017347611719742417, 'timestamp': '2025-09-30 23:09:19.492482', 'step': 5351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:19.547492', 'step': 5351, 'epoch': 3} {'type': 'loss', 'content': 0.03934859484434128, 'timestamp': '2025-09-30 23:09:19.552871', 'step': 5352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:19.606771', 'step': 5352, 'epoch': 3} {'type': 'loss', 'content': 0.014060710556805134, 'timestamp': '2025-09-30 23:09:19.623638', 'step': 5353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:19.719586', 'step': 5353, 'epoch': 3} {'type': 'loss', 'content': 0.04264117404818535, 'timestamp': '2025-09-30 23:09:19.726492', 'step': 5354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:19.796131', 'step': 5354, 'epoch': 3} {'type': 'loss', 'content': 0.011374914087355137, 'timestamp': '2025-09-30 23:09:19.804914', 'step': 5355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:19.871495', 'step': 5355, 'epoch': 3} {'type': 'loss', 'content': 0.005035586655139923, 'timestamp': '2025-09-30 23:09:19.880982', 'step': 5356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:19.946324', 'step': 5356, 'epoch': 3} {'type': 'loss', 'content': 0.00924051832407713, 'timestamp': '2025-09-30 23:09:19.953732', 'step': 5357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:20.026216', 'step': 5357, 'epoch': 3} {'type': 'loss', 'content': 0.04714871570467949, 'timestamp': '2025-09-30 23:09:20.033715', 'step': 5358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:20.095893', 'step': 5358, 'epoch': 3} {'type': 'loss', 'content': 0.010188560001552105, 'timestamp': '2025-09-30 23:09:20.102586', 'step': 5359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:20.168836', 'step': 5359, 'epoch': 3} {'type': 'loss', 'content': 0.007605017628520727, 'timestamp': '2025-09-30 23:09:20.177784', 'step': 5360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:20.239183', 'step': 5360, 'epoch': 3} {'type': 'loss', 'content': 0.0512358732521534, 'timestamp': '2025-09-30 23:09:20.245431', 'step': 5361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:20.314486', 'step': 5361, 'epoch': 3} {'type': 'loss', 'content': 0.002503202063962817, 'timestamp': '2025-09-30 23:09:20.322549', 'step': 5362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:20.393896', 'step': 5362, 'epoch': 3} {'type': 'loss', 'content': 0.00601035775616765, 'timestamp': '2025-09-30 23:09:20.404618', 'step': 5363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:20.471462', 'step': 5363, 'epoch': 3} {'type': 'loss', 'content': 0.00786734651774168, 'timestamp': '2025-09-30 23:09:20.483346', 'step': 5364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:20.551789', 'step': 5364, 'epoch': 3} {'type': 'loss', 'content': 0.036810047924518585, 'timestamp': '2025-09-30 23:09:20.554717', 'step': 5365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:20.624548', 'step': 5365, 'epoch': 3} {'type': 'loss', 'content': 0.010729692876338959, 'timestamp': '2025-09-30 23:09:20.631232', 'step': 5366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:20.696899', 'step': 5366, 'epoch': 3} {'type': 'loss', 'content': 0.006553677376359701, 'timestamp': '2025-09-30 23:09:20.706038', 'step': 5367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:20.767175', 'step': 5367, 'epoch': 3} {'type': 'loss', 'content': 0.00495486706495285, 'timestamp': '2025-09-30 23:09:20.778060', 'step': 5368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:20.837717', 'step': 5368, 'epoch': 3} {'type': 'loss', 'content': 0.011092112399637699, 'timestamp': '2025-09-30 23:09:20.841146', 'step': 5369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:20.905184', 'step': 5369, 'epoch': 3} {'type': 'loss', 'content': 0.004594368394464254, 'timestamp': '2025-09-30 23:09:20.913131', 'step': 5370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:20.980738', 'step': 5370, 'epoch': 3} {'type': 'loss', 'content': 0.0070354449562728405, 'timestamp': '2025-09-30 23:09:20.988176', 'step': 5371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:21.052381', 'step': 5371, 'epoch': 3} {'type': 'loss', 'content': 0.0014542735880240798, 'timestamp': '2025-09-30 23:09:21.061983', 'step': 5372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:21.123543', 'step': 5372, 'epoch': 3} {'type': 'loss', 'content': 0.0019392379326745868, 'timestamp': '2025-09-30 23:09:21.130027', 'step': 5373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:21.192091', 'step': 5373, 'epoch': 3} {'type': 'loss', 'content': 0.009179824031889439, 'timestamp': '2025-09-30 23:09:21.197973', 'step': 5374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:21.260368', 'step': 5374, 'epoch': 3} {'type': 'loss', 'content': 0.038390614092350006, 'timestamp': '2025-09-30 23:09:21.266584', 'step': 5375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:21.333955', 'step': 5375, 'epoch': 3} {'type': 'loss', 'content': 0.020210588350892067, 'timestamp': '2025-09-30 23:09:21.341460', 'step': 5376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:21.405410', 'step': 5376, 'epoch': 3} {'type': 'loss', 'content': 0.010226892307400703, 'timestamp': '2025-09-30 23:09:21.412521', 'step': 5377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:21.470550', 'step': 5377, 'epoch': 3} {'type': 'loss', 'content': 0.018290260806679726, 'timestamp': '2025-09-30 23:09:21.478024', 'step': 5378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:21.550103', 'step': 5378, 'epoch': 3} {'type': 'loss', 'content': 0.0045138136483728886, 'timestamp': '2025-09-30 23:09:21.557744', 'step': 5379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:21.625324', 'step': 5379, 'epoch': 3} {'type': 'loss', 'content': 0.0053124879486858845, 'timestamp': '2025-09-30 23:09:21.632825', 'step': 5380, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:21.696407', 'step': 5380, 'epoch': 3} {'type': 'loss', 'content': 0.003423611167818308, 'timestamp': '2025-09-30 23:09:21.705354', 'step': 5381, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:21.775391', 'step': 5381, 'epoch': 3} {'type': 'loss', 'content': 0.006459915544837713, 'timestamp': '2025-09-30 23:09:21.779682', 'step': 5382, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:21.836695', 'step': 5382, 'epoch': 3} {'type': 'loss', 'content': 0.001956627005711198, 'timestamp': '2025-09-30 23:09:21.840654', 'step': 5383, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:21.912886', 'step': 5383, 'epoch': 3} {'type': 'loss', 'content': 0.008338394574820995, 'timestamp': '2025-09-30 23:09:21.926456', 'step': 5384, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:21.998568', 'step': 5384, 'epoch': 3} {'type': 'loss', 'content': 0.013260873965919018, 'timestamp': '2025-09-30 23:09:22.005349', 'step': 5385, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:22.074144', 'step': 5385, 'epoch': 3} {'type': 'loss', 'content': 0.04093055799603462, 'timestamp': '2025-09-30 23:09:22.080858', 'step': 5386, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:22.143273', 'step': 5386, 'epoch': 3} {'type': 'loss', 'content': 0.003901091171428561, 'timestamp': '2025-09-30 23:09:22.153505', 'step': 5387, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:22.231999', 'step': 5387, 'epoch': 3} {'type': 'loss', 'content': 0.015721866860985756, 'timestamp': '2025-09-30 23:09:22.240566', 'step': 5388, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:22.321482', 'step': 5388, 'epoch': 3} {'type': 'loss', 'content': 0.004192509222775698, 'timestamp': '2025-09-30 23:09:22.331713', 'step': 5389, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:22.405727', 'step': 5389, 'epoch': 3} {'type': 'loss', 'content': 0.01967029459774494, 'timestamp': '2025-09-30 23:09:22.408381', 'step': 5390, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:22.476517', 'step': 5390, 'epoch': 3} {'type': 'loss', 'content': 0.0016288388287648559, 'timestamp': '2025-09-30 23:09:22.482180', 'step': 5391, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:22.545257', 'step': 5391, 'epoch': 3} {'type': 'loss', 'content': 0.0043714093044400215, 'timestamp': '2025-09-30 23:09:22.553864', 'step': 5392, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:22.607001', 'step': 5392, 'epoch': 3} {'type': 'loss', 'content': 0.0009817540412768722, 'timestamp': '2025-09-30 23:09:22.611603', 'step': 5393, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:22.667676', 'step': 5393, 'epoch': 3} {'type': 'loss', 'content': 0.005321721080690622, 'timestamp': '2025-09-30 23:09:22.669967', 'step': 5394, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:22.722288', 'step': 5394, 'epoch': 3} {'type': 'loss', 'content': 0.01711183786392212, 'timestamp': '2025-09-30 23:09:22.724857', 'step': 5395, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:22.780079', 'step': 5395, 'epoch': 3} {'type': 'loss', 'content': 0.0013473185244947672, 'timestamp': '2025-09-30 23:09:22.786464', 'step': 5396, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:22.838810', 'step': 5396, 'epoch': 3} {'type': 'loss', 'content': 0.019447119906544685, 'timestamp': '2025-09-30 23:09:22.841599', 'step': 5397, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:22.894510', 'step': 5397, 'epoch': 3} {'type': 'loss', 'content': 0.0062785823829472065, 'timestamp': '2025-09-30 23:09:22.897160', 'step': 5398, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:22.964779', 'step': 5398, 'epoch': 3} {'type': 'loss', 'content': 0.012643150053918362, 'timestamp': '2025-09-30 23:09:22.976708', 'step': 5399, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:23.049873', 'step': 5399, 'epoch': 3} {'type': 'loss', 'content': 0.005509054753929377, 'timestamp': '2025-09-30 23:09:23.064331', 'step': 5400, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:23.126588', 'step': 5400, 'epoch': 3} {'type': 'loss', 'content': 0.030414823442697525, 'timestamp': '2025-09-30 23:09:23.130764', 'step': 5401, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:23.200507', 'step': 5401, 'epoch': 3} {'type': 'loss', 'content': 0.006722076330333948, 'timestamp': '2025-09-30 23:09:23.206879', 'step': 5402, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:23.272222', 'step': 5402, 'epoch': 3} {'type': 'loss', 'content': 0.006683838553726673, 'timestamp': '2025-09-30 23:09:23.279754', 'step': 5403, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:23.342032', 'step': 5403, 'epoch': 3} {'type': 'loss', 'content': 0.011533228680491447, 'timestamp': '2025-09-30 23:09:23.353151', 'step': 5404, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:23.423041', 'step': 5404, 'epoch': 3} {'type': 'loss', 'content': 0.001321260118857026, 'timestamp': '2025-09-30 23:09:23.425736', 'step': 5405, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:23.479544', 'step': 5405, 'epoch': 3} {'type': 'loss', 'content': 0.0013223737478256226, 'timestamp': '2025-09-30 23:09:23.483931', 'step': 5406, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:23.542086', 'step': 5406, 'epoch': 3} {'type': 'loss', 'content': 0.007727981545031071, 'timestamp': '2025-09-30 23:09:23.545822', 'step': 5407, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:23.598575', 'step': 5407, 'epoch': 3} {'type': 'loss', 'content': 0.0007249112240970135, 'timestamp': '2025-09-30 23:09:23.604750', 'step': 5408, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:23.659317', 'step': 5408, 'epoch': 3} {'type': 'loss', 'content': 0.00014886110147926956, 'timestamp': '2025-09-30 23:09:23.661822', 'step': 5409, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:23.721974', 'step': 5409, 'epoch': 3} {'type': 'loss', 'content': 0.004364133346825838, 'timestamp': '2025-09-30 23:09:23.725826', 'step': 5410, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:23.786192', 'step': 5410, 'epoch': 3} {'type': 'loss', 'content': 0.0030787389259785414, 'timestamp': '2025-09-30 23:09:23.791977', 'step': 5411, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:23.849340', 'step': 5411, 'epoch': 3} {'type': 'loss', 'content': 0.0008832313469611108, 'timestamp': '2025-09-30 23:09:23.861318', 'step': 5412, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:23.930304', 'step': 5412, 'epoch': 3} {'type': 'loss', 'content': 0.0008041294640861452, 'timestamp': '2025-09-30 23:09:23.938446', 'step': 5413, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:24.010986', 'step': 5413, 'epoch': 3} {'type': 'loss', 'content': 0.03617030754685402, 'timestamp': '2025-09-30 23:09:24.014490', 'step': 5414, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:24.073434', 'step': 5414, 'epoch': 3} {'type': 'loss', 'content': 0.0022280763369053602, 'timestamp': '2025-09-30 23:09:24.081226', 'step': 5415, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:24.156807', 'step': 5415, 'epoch': 3} {'type': 'loss', 'content': 0.00038455385947600007, 'timestamp': '2025-09-30 23:09:24.168873', 'step': 5416, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:24.224996', 'step': 5416, 'epoch': 3} {'type': 'loss', 'content': 0.004471946973353624, 'timestamp': '2025-09-30 23:09:24.235158', 'step': 5417, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:24.307686', 'step': 5417, 'epoch': 3} {'type': 'loss', 'content': 0.001790498266927898, 'timestamp': '2025-09-30 23:09:24.317295', 'step': 5418, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:24.374657', 'step': 5418, 'epoch': 3} {'type': 'loss', 'content': 0.0013165902346372604, 'timestamp': '2025-09-30 23:09:24.388039', 'step': 5419, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:24.462062', 'step': 5419, 'epoch': 3} {'type': 'loss', 'content': 0.00015124141646083444, 'timestamp': '2025-09-30 23:09:24.475150', 'step': 5420, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:24.549475', 'step': 5420, 'epoch': 3} {'type': 'loss', 'content': 0.010753738693892956, 'timestamp': '2025-09-30 23:09:24.559159', 'step': 5421, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:24.633363', 'step': 5421, 'epoch': 3} {'type': 'loss', 'content': 0.013182363472878933, 'timestamp': '2025-09-30 23:09:24.651520', 'step': 5422, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:24.731083', 'step': 5422, 'epoch': 3} {'type': 'loss', 'content': 0.0004045519162900746, 'timestamp': '2025-09-30 23:09:24.743478', 'step': 5423, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:24.825110', 'step': 5423, 'epoch': 3} {'type': 'loss', 'content': 0.022795457392930984, 'timestamp': '2025-09-30 23:09:24.833243', 'step': 5424, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:24.924207', 'step': 5424, 'epoch': 3} {'type': 'loss', 'content': 0.0005146376206539571, 'timestamp': '2025-09-30 23:09:24.936742', 'step': 5425, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:25.027171', 'step': 5425, 'epoch': 3} {'type': 'loss', 'content': 0.006732369773089886, 'timestamp': '2025-09-30 23:09:25.031069', 'step': 5426, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:25.124467', 'step': 5426, 'epoch': 3} {'type': 'loss', 'content': 0.01118514221161604, 'timestamp': '2025-09-30 23:09:25.137450', 'step': 5427, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:25.223239', 'step': 5427, 'epoch': 3} {'type': 'loss', 'content': 0.0025245456490665674, 'timestamp': '2025-09-30 23:09:25.240912', 'step': 5428, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:25.317311', 'step': 5428, 'epoch': 3} {'type': 'loss', 'content': 0.0002227245131507516, 'timestamp': '2025-09-30 23:09:25.333366', 'step': 5429, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:25.433119', 'step': 5429, 'epoch': 3} {'type': 'loss', 'content': 0.0031108383554965258, 'timestamp': '2025-09-30 23:09:25.446646', 'step': 5430, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:25.531193', 'step': 5430, 'epoch': 3} {'type': 'loss', 'content': 0.00040235641063190997, 'timestamp': '2025-09-30 23:09:25.545816', 'step': 5431, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:25.627593', 'step': 5431, 'epoch': 3} {'type': 'loss', 'content': 0.00014701473992317915, 'timestamp': '2025-09-30 23:09:25.637579', 'step': 5432, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:25.728596', 'step': 5432, 'epoch': 3} {'type': 'loss', 'content': 0.0029251801315695047, 'timestamp': '2025-09-30 23:09:25.733706', 'step': 5433, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:25.823817', 'step': 5433, 'epoch': 3} {'type': 'loss', 'content': 0.027303893119096756, 'timestamp': '2025-09-30 23:09:25.836976', 'step': 5434, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:25.924321', 'step': 5434, 'epoch': 3} {'type': 'loss', 'content': 0.05804658681154251, 'timestamp': '2025-09-30 23:09:25.935542', 'step': 5435, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:26.025257', 'step': 5435, 'epoch': 3} {'type': 'loss', 'content': 0.005748578812927008, 'timestamp': '2025-09-30 23:09:26.040781', 'step': 5436, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:26.117947', 'step': 5436, 'epoch': 3} {'type': 'loss', 'content': 0.0004014233418274671, 'timestamp': '2025-09-30 23:09:26.130508', 'step': 5437, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:26.214249', 'step': 5437, 'epoch': 3} {'type': 'loss', 'content': 0.02601737156510353, 'timestamp': '2025-09-30 23:09:26.218149', 'step': 5438, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:26.300892', 'step': 5438, 'epoch': 3} {'type': 'loss', 'content': 0.0016031678533181548, 'timestamp': '2025-09-30 23:09:26.314447', 'step': 5439, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:26.396525', 'step': 5439, 'epoch': 3} {'type': 'loss', 'content': 0.0009900204604491591, 'timestamp': '2025-09-30 23:09:26.407266', 'step': 5440, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:26.475879', 'step': 5440, 'epoch': 3} {'type': 'loss', 'content': 0.00878051482141018, 'timestamp': '2025-09-30 23:09:26.483018', 'step': 5441, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:26.552710', 'step': 5441, 'epoch': 3} {'type': 'loss', 'content': 0.04881301149725914, 'timestamp': '2025-09-30 23:09:26.558716', 'step': 5442, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:26.628037', 'step': 5442, 'epoch': 3} {'type': 'loss', 'content': 0.0017477964283898473, 'timestamp': '2025-09-30 23:09:26.633483', 'step': 5443, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:26.700404', 'step': 5443, 'epoch': 3} {'type': 'loss', 'content': 0.0019304677844047546, 'timestamp': '2025-09-30 23:09:26.708253', 'step': 5444, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:26.777311', 'step': 5444, 'epoch': 3} {'type': 'loss', 'content': 0.0008868641452863812, 'timestamp': '2025-09-30 23:09:26.780417', 'step': 5445, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:26.851313', 'step': 5445, 'epoch': 3} {'type': 'loss', 'content': 0.00014177760749589652, 'timestamp': '2025-09-30 23:09:26.863729', 'step': 5446, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:26.926359', 'step': 5446, 'epoch': 3} {'type': 'loss', 'content': 0.00025789771461859345, 'timestamp': '2025-09-30 23:09:26.930703', 'step': 5447, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:27.000143', 'step': 5447, 'epoch': 3} {'type': 'loss', 'content': 0.01734933629631996, 'timestamp': '2025-09-30 23:09:27.007470', 'step': 5448, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:27.081125', 'step': 5448, 'epoch': 3} {'type': 'loss', 'content': 0.0021619549952447414, 'timestamp': '2025-09-30 23:09:27.091598', 'step': 5449, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:27.168041', 'step': 5449, 'epoch': 3} {'type': 'loss', 'content': 0.00039158089202828705, 'timestamp': '2025-09-30 23:09:27.172040', 'step': 5450, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:27.231474', 'step': 5450, 'epoch': 3} {'type': 'loss', 'content': 6.703741382807493e-05, 'timestamp': '2025-09-30 23:09:27.235182', 'step': 5451, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:27.322247', 'step': 5451, 'epoch': 3} {'type': 'loss', 'content': 0.004693084396421909, 'timestamp': '2025-09-30 23:09:27.334845', 'step': 5452, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:27.422924', 'step': 5452, 'epoch': 3} {'type': 'loss', 'content': 0.0012994010467082262, 'timestamp': '2025-09-30 23:09:27.432799', 'step': 5453, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:27.507447', 'step': 5453, 'epoch': 3} {'type': 'loss', 'content': 0.01946217007935047, 'timestamp': '2025-09-30 23:09:27.522479', 'step': 5454, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:27.597285', 'step': 5454, 'epoch': 3} {'type': 'loss', 'content': 0.0009833329822868109, 'timestamp': '2025-09-30 23:09:27.600305', 'step': 5455, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:27.663550', 'step': 5455, 'epoch': 3} {'type': 'loss', 'content': 0.000572807271964848, 'timestamp': '2025-09-30 23:09:27.669937', 'step': 5456, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:27.730680', 'step': 5456, 'epoch': 3} {'type': 'loss', 'content': 0.0007083440432325006, 'timestamp': '2025-09-30 23:09:27.738076', 'step': 5457, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:27.797368', 'step': 5457, 'epoch': 3} {'type': 'loss', 'content': 0.00029297146829776466, 'timestamp': '2025-09-30 23:09:27.800173', 'step': 5458, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:27.860555', 'step': 5458, 'epoch': 3} {'type': 'loss', 'content': 0.02055474743247032, 'timestamp': '2025-09-30 23:09:27.863186', 'step': 5459, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:27.931543', 'step': 5459, 'epoch': 3} {'type': 'loss', 'content': 0.011115794070065022, 'timestamp': '2025-09-30 23:09:27.943825', 'step': 5460, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:09:28.021495', 'step': 5460, 'epoch': 3} {'type': 'loss', 'content': 0.0010016693267971277, 'timestamp': '2025-09-30 23:09:28.029540', 'step': 5461, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:28.094822', 'step': 5461, 'epoch': 3} {'type': 'loss', 'content': 0.005365528166294098, 'timestamp': '2025-09-30 23:09:28.101848', 'step': 5462, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:28.168537', 'step': 5462, 'epoch': 3} {'type': 'loss', 'content': 0.0010342253372073174, 'timestamp': '2025-09-30 23:09:28.171134', 'step': 5463, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:28.236101', 'step': 5463, 'epoch': 3} {'type': 'loss', 'content': 0.011748586781322956, 'timestamp': '2025-09-30 23:09:28.245598', 'step': 5464, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:28.303084', 'step': 5464, 'epoch': 3} {'type': 'loss', 'content': 0.03174746409058571, 'timestamp': '2025-09-30 23:09:28.307877', 'step': 5465, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:28.366554', 'step': 5465, 'epoch': 3} {'type': 'loss', 'content': 0.03449972718954086, 'timestamp': '2025-09-30 23:09:28.370175', 'step': 5466, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:28.428903', 'step': 5466, 'epoch': 3} {'type': 'loss', 'content': 0.013487524352967739, 'timestamp': '2025-09-30 23:09:28.437184', 'step': 5467, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:28.498317', 'step': 5467, 'epoch': 3} {'type': 'loss', 'content': 0.00027657506871037185, 'timestamp': '2025-09-30 23:09:28.505230', 'step': 5468, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:28.576234', 'step': 5468, 'epoch': 3} {'type': 'loss', 'content': 0.011446461081504822, 'timestamp': '2025-09-30 23:09:28.585785', 'step': 5469, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:28.649043', 'step': 5469, 'epoch': 3} {'type': 'loss', 'content': 9.218641935149208e-05, 'timestamp': '2025-09-30 23:09:28.655331', 'step': 5470, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:28.719117', 'step': 5470, 'epoch': 3} {'type': 'loss', 'content': 0.00015280256047844887, 'timestamp': '2025-09-30 23:09:28.724915', 'step': 5471, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:28.793957', 'step': 5471, 'epoch': 3} {'type': 'loss', 'content': 0.02576504833996296, 'timestamp': '2025-09-30 23:09:28.805007', 'step': 5472, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1596914505344}], 'timestamp': '2025-09-30 23:09:34.529383', 'step': 5472, 'epoch': 3} {'type': 'pplx', 'content': 8572571.361247847, 'timestamp': '2025-09-30 23:09:34.539862', 'step': 5472, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:34.611036', 'step': 5472, 'epoch': 3} {'type': 'loss', 'content': 0.05815792828798294, 'timestamp': '2025-09-30 23:09:34.621482', 'step': 5473, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:34.707893', 'step': 5473, 'epoch': 3} {'type': 'loss', 'content': 0.006323071662336588, 'timestamp': '2025-09-30 23:09:34.717147', 'step': 5474, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:34.794797', 'step': 5474, 'epoch': 3} {'type': 'loss', 'content': 0.00040910180541686714, 'timestamp': '2025-09-30 23:09:34.806471', 'step': 5475, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:34.878462', 'step': 5475, 'epoch': 3} {'type': 'loss', 'content': 0.00016846682410687208, 'timestamp': '2025-09-30 23:09:34.895277', 'step': 5476, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:34.970065', 'step': 5476, 'epoch': 3} {'type': 'loss', 'content': 0.009840218350291252, 'timestamp': '2025-09-30 23:09:34.972350', 'step': 5477, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:35.045963', 'step': 5477, 'epoch': 3} {'type': 'loss', 'content': 0.0037580374628305435, 'timestamp': '2025-09-30 23:09:35.055993', 'step': 5478, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:35.140604', 'step': 5478, 'epoch': 3} {'type': 'loss', 'content': 4.935025572194718e-05, 'timestamp': '2025-09-30 23:09:35.150756', 'step': 5479, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:35.235152', 'step': 5479, 'epoch': 3} {'type': 'loss', 'content': 0.006420737598091364, 'timestamp': '2025-09-30 23:09:35.247845', 'step': 5480, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:35.336289', 'step': 5480, 'epoch': 3} {'type': 'loss', 'content': 0.020417917519807816, 'timestamp': '2025-09-30 23:09:35.341705', 'step': 5481, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:35.430827', 'step': 5481, 'epoch': 3} {'type': 'loss', 'content': 0.007604779209941626, 'timestamp': '2025-09-30 23:09:35.442595', 'step': 5482, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:35.523279', 'step': 5482, 'epoch': 3} {'type': 'loss', 'content': 0.0007609566091559827, 'timestamp': '2025-09-30 23:09:35.534322', 'step': 5483, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:35.617225', 'step': 5483, 'epoch': 3} {'type': 'loss', 'content': 0.00019569277355913073, 'timestamp': '2025-09-30 23:09:35.625896', 'step': 5484, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:35.699550', 'step': 5484, 'epoch': 3} {'type': 'loss', 'content': 0.019593371078372, 'timestamp': '2025-09-30 23:09:35.708729', 'step': 5485, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:35.788617', 'step': 5485, 'epoch': 3} {'type': 'loss', 'content': 0.0023829929996281862, 'timestamp': '2025-09-30 23:09:35.798803', 'step': 5486, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:35.875498', 'step': 5486, 'epoch': 3} {'type': 'loss', 'content': 0.0017279606545343995, 'timestamp': '2025-09-30 23:09:35.880367', 'step': 5487, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:35.957783', 'step': 5487, 'epoch': 3} {'type': 'loss', 'content': 0.00012907394557259977, 'timestamp': '2025-09-30 23:09:35.971070', 'step': 5488, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:36.041757', 'step': 5488, 'epoch': 3} {'type': 'loss', 'content': 0.015783419832587242, 'timestamp': '2025-09-30 23:09:36.053535', 'step': 5489, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:36.138133', 'step': 5489, 'epoch': 3} {'type': 'loss', 'content': 0.0033578730653971434, 'timestamp': '2025-09-30 23:09:36.147831', 'step': 5490, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:36.219090', 'step': 5490, 'epoch': 3} {'type': 'loss', 'content': 0.005302288103848696, 'timestamp': '2025-09-30 23:09:36.227032', 'step': 5491, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:36.302431', 'step': 5491, 'epoch': 3} {'type': 'loss', 'content': 0.0005284766084514558, 'timestamp': '2025-09-30 23:09:36.313536', 'step': 5492, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:36.387060', 'step': 5492, 'epoch': 3} {'type': 'loss', 'content': 0.00024573400150984526, 'timestamp': '2025-09-30 23:09:36.394275', 'step': 5493, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:36.467589', 'step': 5493, 'epoch': 3} {'type': 'loss', 'content': 0.005278735887259245, 'timestamp': '2025-09-30 23:09:36.472675', 'step': 5494, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:36.556778', 'step': 5494, 'epoch': 3} {'type': 'loss', 'content': 0.04024958983063698, 'timestamp': '2025-09-30 23:09:36.566397', 'step': 5495, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:36.648289', 'step': 5495, 'epoch': 3} {'type': 'loss', 'content': 0.03824962303042412, 'timestamp': '2025-09-30 23:09:36.661963', 'step': 5496, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:36.761483', 'step': 5496, 'epoch': 3} {'type': 'loss', 'content': 0.0036676875315606594, 'timestamp': '2025-09-30 23:09:36.775150', 'step': 5497, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:36.853836', 'step': 5497, 'epoch': 3} {'type': 'loss', 'content': 0.0015695448964834213, 'timestamp': '2025-09-30 23:09:36.870335', 'step': 5498, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:36.961641', 'step': 5498, 'epoch': 3} {'type': 'loss', 'content': 0.00038389768451452255, 'timestamp': '2025-09-30 23:09:36.977987', 'step': 5499, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:37.072941', 'step': 5499, 'epoch': 3} {'type': 'loss', 'content': 0.0002330776915187016, 'timestamp': '2025-09-30 23:09:37.079528', 'step': 5500, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 5500', 'timestamp': '2025-09-30 23:09:37.618861', 'step': 5500, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:37.695004', 'step': 5500, 'epoch': 3} {'type': 'loss', 'content': 0.0006275003543123603, 'timestamp': '2025-09-30 23:09:37.715187', 'step': 5501, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:37.787613', 'step': 5501, 'epoch': 3} {'type': 'loss', 'content': 0.0010975460754707456, 'timestamp': '2025-09-30 23:09:37.807100', 'step': 5502, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:37.889020', 'step': 5502, 'epoch': 3} {'type': 'loss', 'content': 0.0017312531126663089, 'timestamp': '2025-09-30 23:09:37.908748', 'step': 5503, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:37.984381', 'step': 5503, 'epoch': 3} {'type': 'loss', 'content': 0.02498714253306389, 'timestamp': '2025-09-30 23:09:38.004343', 'step': 5504, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:38.073269', 'step': 5504, 'epoch': 3} {'type': 'loss', 'content': 0.00016679626423865557, 'timestamp': '2025-09-30 23:09:38.076562', 'step': 5505, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:38.148814', 'step': 5505, 'epoch': 3} {'type': 'loss', 'content': 0.0019747575279325247, 'timestamp': '2025-09-30 23:09:38.153047', 'step': 5506, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:38.211427', 'step': 5506, 'epoch': 3} {'type': 'loss', 'content': 0.017321862280368805, 'timestamp': '2025-09-30 23:09:38.221954', 'step': 5507, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:38.280067', 'step': 5507, 'epoch': 3} {'type': 'loss', 'content': 0.007994195446372032, 'timestamp': '2025-09-30 23:09:38.292832', 'step': 5508, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:38.363056', 'step': 5508, 'epoch': 3} {'type': 'loss', 'content': 0.0003813300281763077, 'timestamp': '2025-09-30 23:09:38.372463', 'step': 5509, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:38.443553', 'step': 5509, 'epoch': 3} {'type': 'loss', 'content': 0.009951763786375523, 'timestamp': '2025-09-30 23:09:38.446872', 'step': 5510, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:38.510898', 'step': 5510, 'epoch': 3} {'type': 'loss', 'content': 0.0010107032721862197, 'timestamp': '2025-09-30 23:09:38.517724', 'step': 5511, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:38.583104', 'step': 5511, 'epoch': 3} {'type': 'loss', 'content': 0.0010057325707748532, 'timestamp': '2025-09-30 23:09:38.600353', 'step': 5512, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:38.673047', 'step': 5512, 'epoch': 3} {'type': 'loss', 'content': 0.00023145588056650013, 'timestamp': '2025-09-30 23:09:38.688240', 'step': 5513, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:38.773086', 'step': 5513, 'epoch': 3} {'type': 'loss', 'content': 0.013617872260510921, 'timestamp': '2025-09-30 23:09:38.778184', 'step': 5514, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:38.846459', 'step': 5514, 'epoch': 3} {'type': 'loss', 'content': 0.0030271424911916256, 'timestamp': '2025-09-30 23:09:38.850679', 'step': 5515, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:38.907496', 'step': 5515, 'epoch': 3} {'type': 'loss', 'content': 0.0002362019440624863, 'timestamp': '2025-09-30 23:09:38.922338', 'step': 5516, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:39.007820', 'step': 5516, 'epoch': 3} {'type': 'loss', 'content': 0.02170890010893345, 'timestamp': '2025-09-30 23:09:39.021877', 'step': 5517, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:39.099950', 'step': 5517, 'epoch': 3} {'type': 'loss', 'content': 0.00601471122354269, 'timestamp': '2025-09-30 23:09:39.112293', 'step': 5518, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:39.190696', 'step': 5518, 'epoch': 3} {'type': 'loss', 'content': 0.028549794107675552, 'timestamp': '2025-09-30 23:09:39.203889', 'step': 5519, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:39.288581', 'step': 5519, 'epoch': 3} {'type': 'loss', 'content': 0.004078337457031012, 'timestamp': '2025-09-30 23:09:39.307725', 'step': 5520, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:39.404057', 'step': 5520, 'epoch': 3} {'type': 'loss', 'content': 0.002331932308152318, 'timestamp': '2025-09-30 23:09:39.418389', 'step': 5521, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:39.515439', 'step': 5521, 'epoch': 3} {'type': 'loss', 'content': 0.0033930905628949404, 'timestamp': '2025-09-30 23:09:39.532780', 'step': 5522, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:39.624613', 'step': 5522, 'epoch': 3} {'type': 'loss', 'content': 0.007623393554240465, 'timestamp': '2025-09-30 23:09:39.642364', 'step': 5523, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:39.748537', 'step': 5523, 'epoch': 3} {'type': 'loss', 'content': 0.00021464504243340343, 'timestamp': '2025-09-30 23:09:39.771211', 'step': 5524, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:39.857568', 'step': 5524, 'epoch': 3} {'type': 'loss', 'content': 0.01744997687637806, 'timestamp': '2025-09-30 23:09:39.871276', 'step': 5525, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:39.946179', 'step': 5525, 'epoch': 3} {'type': 'loss', 'content': 0.0036422070115804672, 'timestamp': '2025-09-30 23:09:39.953372', 'step': 5526, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:40.036290', 'step': 5526, 'epoch': 3} {'type': 'loss', 'content': 0.0025871696416288614, 'timestamp': '2025-09-30 23:09:40.050080', 'step': 5527, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:40.126667', 'step': 5527, 'epoch': 3} {'type': 'loss', 'content': 0.0009985511424019933, 'timestamp': '2025-09-30 23:09:40.143016', 'step': 5528, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:40.226814', 'step': 5528, 'epoch': 3} {'type': 'loss', 'content': 0.016591912135481834, 'timestamp': '2025-09-30 23:09:40.239146', 'step': 5529, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:40.325366', 'step': 5529, 'epoch': 3} {'type': 'loss', 'content': 0.001013635890558362, 'timestamp': '2025-09-30 23:09:40.331475', 'step': 5530, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:40.392270', 'step': 5530, 'epoch': 3} {'type': 'loss', 'content': 0.01627892442047596, 'timestamp': '2025-09-30 23:09:40.404879', 'step': 5531, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:40.486544', 'step': 5531, 'epoch': 3} {'type': 'loss', 'content': 0.005317458417266607, 'timestamp': '2025-09-30 23:09:40.504674', 'step': 5532, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:40.595408', 'step': 5532, 'epoch': 3} {'type': 'loss', 'content': 0.004740710835903883, 'timestamp': '2025-09-30 23:09:40.600966', 'step': 5533, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:40.690606', 'step': 5533, 'epoch': 3} {'type': 'loss', 'content': 0.010777981020510197, 'timestamp': '2025-09-30 23:09:40.704529', 'step': 5534, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:40.802666', 'step': 5534, 'epoch': 3} {'type': 'loss', 'content': 0.007578778546303511, 'timestamp': '2025-09-30 23:09:40.814917', 'step': 5535, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:40.881538', 'step': 5535, 'epoch': 3} {'type': 'loss', 'content': 0.0014549950137734413, 'timestamp': '2025-09-30 23:09:40.888285', 'step': 5536, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:40.978741', 'step': 5536, 'epoch': 3} {'type': 'loss', 'content': 0.0001395871368004009, 'timestamp': '2025-09-30 23:09:40.982651', 'step': 5537, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:41.064694', 'step': 5537, 'epoch': 3} {'type': 'loss', 'content': 0.04486862197518349, 'timestamp': '2025-09-30 23:09:41.077060', 'step': 5538, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:41.164471', 'step': 5538, 'epoch': 3} {'type': 'loss', 'content': 0.040409017354249954, 'timestamp': '2025-09-30 23:09:41.178420', 'step': 5539, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:41.264908', 'step': 5539, 'epoch': 3} {'type': 'loss', 'content': 0.0008408591384068131, 'timestamp': '2025-09-30 23:09:41.281560', 'step': 5540, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:41.365665', 'step': 5540, 'epoch': 3} {'type': 'loss', 'content': 0.018486173823475838, 'timestamp': '2025-09-30 23:09:41.378898', 'step': 5541, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:41.471771', 'step': 5541, 'epoch': 3} {'type': 'loss', 'content': 0.000217291948501952, 'timestamp': '2025-09-30 23:09:41.477459', 'step': 5542, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:41.559936', 'step': 5542, 'epoch': 3} {'type': 'loss', 'content': 0.0006515471031889319, 'timestamp': '2025-09-30 23:09:41.575971', 'step': 5543, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:41.658346', 'step': 5543, 'epoch': 3} {'type': 'loss', 'content': 0.0016795714618638158, 'timestamp': '2025-09-30 23:09:41.676640', 'step': 5544, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:41.763887', 'step': 5544, 'epoch': 3} {'type': 'loss', 'content': 0.00738621037453413, 'timestamp': '2025-09-30 23:09:41.782518', 'step': 5545, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:41.870561', 'step': 5545, 'epoch': 3} {'type': 'loss', 'content': 0.008085842244327068, 'timestamp': '2025-09-30 23:09:41.887237', 'step': 5546, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:41.978331', 'step': 5546, 'epoch': 3} {'type': 'loss', 'content': 0.008205492980778217, 'timestamp': '2025-09-30 23:09:41.982120', 'step': 5547, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:42.063203', 'step': 5547, 'epoch': 3} {'type': 'loss', 'content': 0.0008497735834680498, 'timestamp': '2025-09-30 23:09:42.082861', 'step': 5548, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:42.172675', 'step': 5548, 'epoch': 3} {'type': 'loss', 'content': 0.0063981288112699986, 'timestamp': '2025-09-30 23:09:42.177739', 'step': 5549, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:42.259134', 'step': 5549, 'epoch': 3} {'type': 'loss', 'content': 0.004354391247034073, 'timestamp': '2025-09-30 23:09:42.278460', 'step': 5550, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:42.367684', 'step': 5550, 'epoch': 3} {'type': 'loss', 'content': 0.015083110891282558, 'timestamp': '2025-09-30 23:09:42.382601', 'step': 5551, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:42.453609', 'step': 5551, 'epoch': 3} {'type': 'loss', 'content': 0.005739141721278429, 'timestamp': '2025-09-30 23:09:42.473313', 'step': 5552, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:42.538608', 'step': 5552, 'epoch': 3} {'type': 'loss', 'content': 0.001210228307172656, 'timestamp': '2025-09-30 23:09:42.555303', 'step': 5553, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:42.647300', 'step': 5553, 'epoch': 3} {'type': 'loss', 'content': 0.012325928546488285, 'timestamp': '2025-09-30 23:09:42.656428', 'step': 5554, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:42.753274', 'step': 5554, 'epoch': 3} {'type': 'loss', 'content': 0.000394166650949046, 'timestamp': '2025-09-30 23:09:42.766731', 'step': 5555, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:42.857178', 'step': 5555, 'epoch': 3} {'type': 'loss', 'content': 0.0050447918474674225, 'timestamp': '2025-09-30 23:09:42.875798', 'step': 5556, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:42.937002', 'step': 5556, 'epoch': 3} {'type': 'loss', 'content': 0.002191136358305812, 'timestamp': '2025-09-30 23:09:42.947496', 'step': 5557, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:43.026500', 'step': 5557, 'epoch': 3} {'type': 'loss', 'content': 0.01303886529058218, 'timestamp': '2025-09-30 23:09:43.032047', 'step': 5558, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:43.100827', 'step': 5558, 'epoch': 3} {'type': 'loss', 'content': 0.0032597023528069258, 'timestamp': '2025-09-30 23:09:43.110090', 'step': 5559, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:43.189071', 'step': 5559, 'epoch': 3} {'type': 'loss', 'content': 0.0017571104690432549, 'timestamp': '2025-09-30 23:09:43.196510', 'step': 5560, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:43.286632', 'step': 5560, 'epoch': 3} {'type': 'loss', 'content': 0.002579588210210204, 'timestamp': '2025-09-30 23:09:43.301069', 'step': 5561, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:43.394835', 'step': 5561, 'epoch': 3} {'type': 'loss', 'content': 0.011145849712193012, 'timestamp': '2025-09-30 23:09:43.413021', 'step': 5562, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:43.488022', 'step': 5562, 'epoch': 3} {'type': 'loss', 'content': 0.0006743413396179676, 'timestamp': '2025-09-30 23:09:43.500223', 'step': 5563, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:43.585450', 'step': 5563, 'epoch': 3} {'type': 'loss', 'content': 0.04123864322900772, 'timestamp': '2025-09-30 23:09:43.592366', 'step': 5564, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:43.675254', 'step': 5564, 'epoch': 3} {'type': 'loss', 'content': 0.0036577850114554167, 'timestamp': '2025-09-30 23:09:43.679567', 'step': 5565, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:43.758897', 'step': 5565, 'epoch': 3} {'type': 'loss', 'content': 0.00019608056754805148, 'timestamp': '2025-09-30 23:09:43.763219', 'step': 5566, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:43.839711', 'step': 5566, 'epoch': 3} {'type': 'loss', 'content': 0.0027913968078792095, 'timestamp': '2025-09-30 23:09:43.853114', 'step': 5567, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:43.929614', 'step': 5567, 'epoch': 3} {'type': 'loss', 'content': 0.0009818432154133916, 'timestamp': '2025-09-30 23:09:43.937370', 'step': 5568, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:44.009957', 'step': 5568, 'epoch': 3} {'type': 'loss', 'content': 0.016927678138017654, 'timestamp': '2025-09-30 23:09:44.013830', 'step': 5569, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:44.077501', 'step': 5569, 'epoch': 3} {'type': 'loss', 'content': 0.000419597199652344, 'timestamp': '2025-09-30 23:09:44.087708', 'step': 5570, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:44.159707', 'step': 5570, 'epoch': 3} {'type': 'loss', 'content': 0.003562366822734475, 'timestamp': '2025-09-30 23:09:44.167876', 'step': 5571, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:44.241567', 'step': 5571, 'epoch': 3} {'type': 'loss', 'content': 0.000379448028979823, 'timestamp': '2025-09-30 23:09:44.249113', 'step': 5572, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:44.320255', 'step': 5572, 'epoch': 3} {'type': 'loss', 'content': 0.000498569686897099, 'timestamp': '2025-09-30 23:09:44.325710', 'step': 5573, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:44.395726', 'step': 5573, 'epoch': 3} {'type': 'loss', 'content': 0.03390287980437279, 'timestamp': '2025-09-30 23:09:44.404541', 'step': 5574, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:44.475899', 'step': 5574, 'epoch': 3} {'type': 'loss', 'content': 0.002072882140055299, 'timestamp': '2025-09-30 23:09:44.480040', 'step': 5575, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:44.547495', 'step': 5575, 'epoch': 3} {'type': 'loss', 'content': 0.0003331960760988295, 'timestamp': '2025-09-30 23:09:44.561774', 'step': 5576, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:44.633566', 'step': 5576, 'epoch': 3} {'type': 'loss', 'content': 0.0011965977028012276, 'timestamp': '2025-09-30 23:09:44.642604', 'step': 5577, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:44.703462', 'step': 5577, 'epoch': 3} {'type': 'loss', 'content': 0.0006197835900820792, 'timestamp': '2025-09-30 23:09:44.707316', 'step': 5578, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:44.763030', 'step': 5578, 'epoch': 3} {'type': 'loss', 'content': 0.00021648206165991724, 'timestamp': '2025-09-30 23:09:44.773563', 'step': 5579, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:44.857614', 'step': 5579, 'epoch': 3} {'type': 'loss', 'content': 0.0031152102164924145, 'timestamp': '2025-09-30 23:09:44.869757', 'step': 5580, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:44.946303', 'step': 5580, 'epoch': 3} {'type': 'loss', 'content': 0.0002557257830630988, 'timestamp': '2025-09-30 23:09:44.954822', 'step': 5581, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:45.030715', 'step': 5581, 'epoch': 3} {'type': 'loss', 'content': 0.0013694296358153224, 'timestamp': '2025-09-30 23:09:45.034770', 'step': 5582, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:45.101553', 'step': 5582, 'epoch': 3} {'type': 'loss', 'content': 0.006910939235240221, 'timestamp': '2025-09-30 23:09:45.112352', 'step': 5583, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:45.187612', 'step': 5583, 'epoch': 3} {'type': 'loss', 'content': 0.0004140521341469139, 'timestamp': '2025-09-30 23:09:45.201557', 'step': 5584, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:45.263545', 'step': 5584, 'epoch': 3} {'type': 'loss', 'content': 0.00031079616746865213, 'timestamp': '2025-09-30 23:09:45.272246', 'step': 5585, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:45.339025', 'step': 5585, 'epoch': 3} {'type': 'loss', 'content': 0.009175794199109077, 'timestamp': '2025-09-30 23:09:45.347968', 'step': 5586, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:45.422443', 'step': 5586, 'epoch': 3} {'type': 'loss', 'content': 0.00020363484509289265, 'timestamp': '2025-09-30 23:09:45.431197', 'step': 5587, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:45.505416', 'step': 5587, 'epoch': 3} {'type': 'loss', 'content': 0.003244184423238039, 'timestamp': '2025-09-30 23:09:45.512729', 'step': 5588, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:45.573720', 'step': 5588, 'epoch': 3} {'type': 'loss', 'content': 0.021670060232281685, 'timestamp': '2025-09-30 23:09:45.583505', 'step': 5589, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:45.651756', 'step': 5589, 'epoch': 3} {'type': 'loss', 'content': 0.0008131765061989427, 'timestamp': '2025-09-30 23:09:45.661117', 'step': 5590, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:45.728587', 'step': 5590, 'epoch': 3} {'type': 'loss', 'content': 6.761869735782966e-05, 'timestamp': '2025-09-30 23:09:45.731956', 'step': 5591, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:45.795714', 'step': 5591, 'epoch': 3} {'type': 'loss', 'content': 0.0036438736133277416, 'timestamp': '2025-09-30 23:09:45.807253', 'step': 5592, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:45.887940', 'step': 5592, 'epoch': 3} {'type': 'loss', 'content': 0.0012355822836980224, 'timestamp': '2025-09-30 23:09:45.896538', 'step': 5593, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:45.961749', 'step': 5593, 'epoch': 3} {'type': 'loss', 'content': 0.0005356374895200133, 'timestamp': '2025-09-30 23:09:45.965468', 'step': 5594, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:46.045564', 'step': 5594, 'epoch': 3} {'type': 'loss', 'content': 7.025932427495718e-05, 'timestamp': '2025-09-30 23:09:46.055181', 'step': 5595, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:46.118502', 'step': 5595, 'epoch': 3} {'type': 'loss', 'content': 0.0027327449060976505, 'timestamp': '2025-09-30 23:09:46.130201', 'step': 5596, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:46.198865', 'step': 5596, 'epoch': 3} {'type': 'loss', 'content': 0.00046396712423302233, 'timestamp': '2025-09-30 23:09:46.208126', 'step': 5597, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:46.280547', 'step': 5597, 'epoch': 3} {'type': 'loss', 'content': 0.012316464446485043, 'timestamp': '2025-09-30 23:09:46.289955', 'step': 5598, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:46.355183', 'step': 5598, 'epoch': 3} {'type': 'loss', 'content': 0.0006664564134553075, 'timestamp': '2025-09-30 23:09:46.358478', 'step': 5599, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:46.421125', 'step': 5599, 'epoch': 3} {'type': 'loss', 'content': 0.00012851323117502034, 'timestamp': '2025-09-30 23:09:46.427559', 'step': 5600, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:46.480845', 'step': 5600, 'epoch': 3} {'type': 'loss', 'content': 0.03424546495079994, 'timestamp': '2025-09-30 23:09:46.487843', 'step': 5601, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:46.551911', 'step': 5601, 'epoch': 3} {'type': 'loss', 'content': 0.0003925621567759663, 'timestamp': '2025-09-30 23:09:46.557863', 'step': 5602, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:46.621174', 'step': 5602, 'epoch': 3} {'type': 'loss', 'content': 0.0017257474828511477, 'timestamp': '2025-09-30 23:09:46.624363', 'step': 5603, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:46.683984', 'step': 5603, 'epoch': 3} {'type': 'loss', 'content': 0.001343251089565456, 'timestamp': '2025-09-30 23:09:46.689859', 'step': 5604, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:46.757013', 'step': 5604, 'epoch': 3} {'type': 'loss', 'content': 0.0005259473109617829, 'timestamp': '2025-09-30 23:09:46.764083', 'step': 5605, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:46.830190', 'step': 5605, 'epoch': 3} {'type': 'loss', 'content': 0.0026577457319945097, 'timestamp': '2025-09-30 23:09:46.837842', 'step': 5606, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:46.907215', 'step': 5606, 'epoch': 3} {'type': 'loss', 'content': 0.00020268921798560768, 'timestamp': '2025-09-30 23:09:46.914774', 'step': 5607, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:46.986225', 'step': 5607, 'epoch': 3} {'type': 'loss', 'content': 0.0011067675659433007, 'timestamp': '2025-09-30 23:09:46.992691', 'step': 5608, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:47.062447', 'step': 5608, 'epoch': 3} {'type': 'loss', 'content': 0.016025038436055183, 'timestamp': '2025-09-30 23:09:47.066975', 'step': 5609, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:47.151280', 'step': 5609, 'epoch': 3} {'type': 'loss', 'content': 0.008787786588072777, 'timestamp': '2025-09-30 23:09:47.162154', 'step': 5610, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:47.230773', 'step': 5610, 'epoch': 3} {'type': 'loss', 'content': 0.0005451317992992699, 'timestamp': '2025-09-30 23:09:47.241202', 'step': 5611, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:47.319817', 'step': 5611, 'epoch': 3} {'type': 'loss', 'content': 0.00083768559852615, 'timestamp': '2025-09-30 23:09:47.333650', 'step': 5612, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:47.409111', 'step': 5612, 'epoch': 3} {'type': 'loss', 'content': 0.00020138350373599678, 'timestamp': '2025-09-30 23:09:47.413212', 'step': 5613, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:47.490602', 'step': 5613, 'epoch': 3} {'type': 'loss', 'content': 0.00393763417378068, 'timestamp': '2025-09-30 23:09:47.494694', 'step': 5614, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:47.569484', 'step': 5614, 'epoch': 3} {'type': 'loss', 'content': 5.206792047829367e-05, 'timestamp': '2025-09-30 23:09:47.579210', 'step': 5615, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:47.653715', 'step': 5615, 'epoch': 3} {'type': 'loss', 'content': 0.0023539518006145954, 'timestamp': '2025-09-30 23:09:47.665162', 'step': 5616, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:47.737939', 'step': 5616, 'epoch': 3} {'type': 'loss', 'content': 0.01639745756983757, 'timestamp': '2025-09-30 23:09:47.746042', 'step': 5617, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:47.817405', 'step': 5617, 'epoch': 3} {'type': 'loss', 'content': 7.451620331266895e-05, 'timestamp': '2025-09-30 23:09:47.822362', 'step': 5618, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:47.889798', 'step': 5618, 'epoch': 3} {'type': 'loss', 'content': 0.000537246698513627, 'timestamp': '2025-09-30 23:09:47.894352', 'step': 5619, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:47.959927', 'step': 5619, 'epoch': 3} {'type': 'loss', 'content': 0.07266023010015488, 'timestamp': '2025-09-30 23:09:47.972767', 'step': 5620, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:48.042685', 'step': 5620, 'epoch': 3} {'type': 'loss', 'content': 0.0003269172739237547, 'timestamp': '2025-09-30 23:09:48.050618', 'step': 5621, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:48.116810', 'step': 5621, 'epoch': 3} {'type': 'loss', 'content': 0.0045699370093643665, 'timestamp': '2025-09-30 23:09:48.122044', 'step': 5622, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:48.188389', 'step': 5622, 'epoch': 3} {'type': 'loss', 'content': 0.030496487393975258, 'timestamp': '2025-09-30 23:09:48.197682', 'step': 5623, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:48.261742', 'step': 5623, 'epoch': 3} {'type': 'loss', 'content': 0.0007362211472354829, 'timestamp': '2025-09-30 23:09:48.267850', 'step': 5624, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1596914505344}], 'timestamp': '2025-09-30 23:09:53.191585', 'step': 5624, 'epoch': 3} {'type': 'pplx', 'content': 9529963.961575134, 'timestamp': '2025-09-30 23:09:53.199577', 'step': 5624, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:53.256578', 'step': 5624, 'epoch': 3} {'type': 'loss', 'content': 4.705970786744729e-05, 'timestamp': '2025-09-30 23:09:53.260601', 'step': 5625, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:53.337865', 'step': 5625, 'epoch': 3} {'type': 'loss', 'content': 0.00022046695812605321, 'timestamp': '2025-09-30 23:09:53.348085', 'step': 5626, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:53.421127', 'step': 5626, 'epoch': 3} {'type': 'loss', 'content': 0.016604971140623093, 'timestamp': '2025-09-30 23:09:53.431151', 'step': 5627, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:53.505366', 'step': 5627, 'epoch': 3} {'type': 'loss', 'content': 0.0007136912317946553, 'timestamp': '2025-09-30 23:09:53.518490', 'step': 5628, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:53.591755', 'step': 5628, 'epoch': 3} {'type': 'loss', 'content': 0.0008012365433387458, 'timestamp': '2025-09-30 23:09:53.597664', 'step': 5629, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:53.667472', 'step': 5629, 'epoch': 3} {'type': 'loss', 'content': 0.03394529968500137, 'timestamp': '2025-09-30 23:09:53.674830', 'step': 5630, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:53.750021', 'step': 5630, 'epoch': 3} {'type': 'loss', 'content': 0.0002256725128972903, 'timestamp': '2025-09-30 23:09:53.757410', 'step': 5631, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:53.835941', 'step': 5631, 'epoch': 3} {'type': 'loss', 'content': 0.0008900674292817712, 'timestamp': '2025-09-30 23:09:53.846866', 'step': 5632, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:53.912529', 'step': 5632, 'epoch': 3} {'type': 'loss', 'content': 0.00018793938215821981, 'timestamp': '2025-09-30 23:09:53.924389', 'step': 5633, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:54.008558', 'step': 5633, 'epoch': 3} {'type': 'loss', 'content': 0.004591318313032389, 'timestamp': '2025-09-30 23:09:54.022948', 'step': 5634, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:54.089239', 'step': 5634, 'epoch': 3} {'type': 'loss', 'content': 0.0049209147691726685, 'timestamp': '2025-09-30 23:09:54.093328', 'step': 5635, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:54.170071', 'step': 5635, 'epoch': 3} {'type': 'loss', 'content': 0.0041635590605437756, 'timestamp': '2025-09-30 23:09:54.183239', 'step': 5636, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:54.245999', 'step': 5636, 'epoch': 3} {'type': 'loss', 'content': 0.0002477086381986737, 'timestamp': '2025-09-30 23:09:54.253913', 'step': 5637, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:54.326294', 'step': 5637, 'epoch': 3} {'type': 'loss', 'content': 0.003953159786760807, 'timestamp': '2025-09-30 23:09:54.333729', 'step': 5638, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:54.404422', 'step': 5638, 'epoch': 3} {'type': 'loss', 'content': 8.260266622528434e-05, 'timestamp': '2025-09-30 23:09:54.407651', 'step': 5639, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:54.478015', 'step': 5639, 'epoch': 3} {'type': 'loss', 'content': 0.008454188704490662, 'timestamp': '2025-09-30 23:09:54.488068', 'step': 5640, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:54.555046', 'step': 5640, 'epoch': 3} {'type': 'loss', 'content': 0.0016734031960368156, 'timestamp': '2025-09-30 23:09:54.562138', 'step': 5641, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:54.625951', 'step': 5641, 'epoch': 3} {'type': 'loss', 'content': 0.00012867840996477753, 'timestamp': '2025-09-30 23:09:54.631812', 'step': 5642, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:54.702317', 'step': 5642, 'epoch': 3} {'type': 'loss', 'content': 9.358602983411402e-05, 'timestamp': '2025-09-30 23:09:54.705278', 'step': 5643, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:54.766742', 'step': 5643, 'epoch': 3} {'type': 'loss', 'content': 0.0005096035310998559, 'timestamp': '2025-09-30 23:09:54.777126', 'step': 5644, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:54.842505', 'step': 5644, 'epoch': 3} {'type': 'loss', 'content': 9.022466110764071e-05, 'timestamp': '2025-09-30 23:09:54.848603', 'step': 5645, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:54.917414', 'step': 5645, 'epoch': 3} {'type': 'loss', 'content': 0.00014366039249580353, 'timestamp': '2025-09-30 23:09:54.925794', 'step': 5646, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:54.995435', 'step': 5646, 'epoch': 3} {'type': 'loss', 'content': 0.008632487617433071, 'timestamp': '2025-09-30 23:09:55.003374', 'step': 5647, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:55.081275', 'step': 5647, 'epoch': 3} {'type': 'loss', 'content': 0.0010955912293866277, 'timestamp': '2025-09-30 23:09:55.093780', 'step': 5648, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:55.163215', 'step': 5648, 'epoch': 3} {'type': 'loss', 'content': 0.001206996850669384, 'timestamp': '2025-09-30 23:09:55.179775', 'step': 5649, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:55.261284', 'step': 5649, 'epoch': 3} {'type': 'loss', 'content': 0.00022179403458721936, 'timestamp': '2025-09-30 23:09:55.265413', 'step': 5650, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:55.340955', 'step': 5650, 'epoch': 3} {'type': 'loss', 'content': 0.06082023307681084, 'timestamp': '2025-09-30 23:09:55.353755', 'step': 5651, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:55.429362', 'step': 5651, 'epoch': 3} {'type': 'loss', 'content': 0.019516844302415848, 'timestamp': '2025-09-30 23:09:55.446607', 'step': 5652, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:55.520279', 'step': 5652, 'epoch': 3} {'type': 'loss', 'content': 0.000925022060982883, 'timestamp': '2025-09-30 23:09:55.524007', 'step': 5653, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:55.587108', 'step': 5653, 'epoch': 3} {'type': 'loss', 'content': 0.0019284609006717801, 'timestamp': '2025-09-30 23:09:55.590889', 'step': 5654, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:55.664753', 'step': 5654, 'epoch': 3} {'type': 'loss', 'content': 0.004257732070982456, 'timestamp': '2025-09-30 23:09:55.675032', 'step': 5655, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:55.739883', 'step': 5655, 'epoch': 3} {'type': 'loss', 'content': 5.885020800633356e-05, 'timestamp': '2025-09-30 23:09:55.747298', 'step': 5656, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:55.823188', 'step': 5656, 'epoch': 3} {'type': 'loss', 'content': 0.00011346056999173015, 'timestamp': '2025-09-30 23:09:55.833347', 'step': 5657, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:55.911685', 'step': 5657, 'epoch': 3} {'type': 'loss', 'content': 0.005323677323758602, 'timestamp': '2025-09-30 23:09:55.920541', 'step': 5658, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:55.993307', 'step': 5658, 'epoch': 3} {'type': 'loss', 'content': 0.0012722992105409503, 'timestamp': '2025-09-30 23:09:55.996873', 'step': 5659, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:56.066351', 'step': 5659, 'epoch': 3} {'type': 'loss', 'content': 0.0034749582409858704, 'timestamp': '2025-09-30 23:09:56.079271', 'step': 5660, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:56.154720', 'step': 5660, 'epoch': 3} {'type': 'loss', 'content': 0.03170279785990715, 'timestamp': '2025-09-30 23:09:56.163833', 'step': 5661, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:56.233168', 'step': 5661, 'epoch': 3} {'type': 'loss', 'content': 0.0002781000512186438, 'timestamp': '2025-09-30 23:09:56.236520', 'step': 5662, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:56.293010', 'step': 5662, 'epoch': 3} {'type': 'loss', 'content': 0.0008951654308475554, 'timestamp': '2025-09-30 23:09:56.300407', 'step': 5663, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:56.375830', 'step': 5663, 'epoch': 3} {'type': 'loss', 'content': 0.017056846991181374, 'timestamp': '2025-09-30 23:09:56.384548', 'step': 5664, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:56.456772', 'step': 5664, 'epoch': 3} {'type': 'loss', 'content': 0.035124026238918304, 'timestamp': '2025-09-30 23:09:56.463639', 'step': 5665, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:56.532437', 'step': 5665, 'epoch': 3} {'type': 'loss', 'content': 0.0047279358841478825, 'timestamp': '2025-09-30 23:09:56.539148', 'step': 5666, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:56.609388', 'step': 5666, 'epoch': 3} {'type': 'loss', 'content': 0.00010726969048846513, 'timestamp': '2025-09-30 23:09:56.616201', 'step': 5667, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:56.681579', 'step': 5667, 'epoch': 3} {'type': 'loss', 'content': 0.0001427569950465113, 'timestamp': '2025-09-30 23:09:56.691126', 'step': 5668, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:56.755164', 'step': 5668, 'epoch': 3} {'type': 'loss', 'content': 0.003974513616412878, 'timestamp': '2025-09-30 23:09:56.766076', 'step': 5669, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:56.846757', 'step': 5669, 'epoch': 3} {'type': 'loss', 'content': 0.013287270441651344, 'timestamp': '2025-09-30 23:09:56.857668', 'step': 5670, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:56.928936', 'step': 5670, 'epoch': 3} {'type': 'loss', 'content': 0.0026166525203734636, 'timestamp': '2025-09-30 23:09:56.933780', 'step': 5671, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:57.006599', 'step': 5671, 'epoch': 3} {'type': 'loss', 'content': 0.0011677331058308482, 'timestamp': '2025-09-30 23:09:57.013771', 'step': 5672, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:57.076226', 'step': 5672, 'epoch': 3} {'type': 'loss', 'content': 0.0001260965218534693, 'timestamp': '2025-09-30 23:09:57.079948', 'step': 5673, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:57.138918', 'step': 5673, 'epoch': 3} {'type': 'loss', 'content': 5.5532960686832666e-05, 'timestamp': '2025-09-30 23:09:57.146127', 'step': 5674, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:57.218423', 'step': 5674, 'epoch': 3} {'type': 'loss', 'content': 0.0027924731839448214, 'timestamp': '2025-09-30 23:09:57.225085', 'step': 5675, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:57.292108', 'step': 5675, 'epoch': 3} {'type': 'loss', 'content': 0.011905943043529987, 'timestamp': '2025-09-30 23:09:57.303494', 'step': 5676, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:57.373468', 'step': 5676, 'epoch': 3} {'type': 'loss', 'content': 0.00039339601062238216, 'timestamp': '2025-09-30 23:09:57.382063', 'step': 5677, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:57.460997', 'step': 5677, 'epoch': 3} {'type': 'loss', 'content': 0.005872097332030535, 'timestamp': '2025-09-30 23:09:57.466085', 'step': 5678, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:57.529733', 'step': 5678, 'epoch': 3} {'type': 'loss', 'content': 0.0013953407760709524, 'timestamp': '2025-09-30 23:09:57.534637', 'step': 5679, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:57.605067', 'step': 5679, 'epoch': 3} {'type': 'loss', 'content': 0.0476289801299572, 'timestamp': '2025-09-30 23:09:57.618494', 'step': 5680, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:57.693941', 'step': 5680, 'epoch': 3} {'type': 'loss', 'content': 0.020090188831090927, 'timestamp': '2025-09-30 23:09:57.699836', 'step': 5681, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:57.770637', 'step': 5681, 'epoch': 3} {'type': 'loss', 'content': 0.0002556064573582262, 'timestamp': '2025-09-30 23:09:57.784582', 'step': 5682, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:57.862148', 'step': 5682, 'epoch': 3} {'type': 'loss', 'content': 0.00010827561345649883, 'timestamp': '2025-09-30 23:09:57.866155', 'step': 5683, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:57.932798', 'step': 5683, 'epoch': 3} {'type': 'loss', 'content': 0.004980423953384161, 'timestamp': '2025-09-30 23:09:57.941489', 'step': 5684, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:58.008444', 'step': 5684, 'epoch': 3} {'type': 'loss', 'content': 0.0013119138311594725, 'timestamp': '2025-09-30 23:09:58.014885', 'step': 5685, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:58.080088', 'step': 5685, 'epoch': 3} {'type': 'loss', 'content': 0.0035246561747044325, 'timestamp': '2025-09-30 23:09:58.088770', 'step': 5686, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:58.152060', 'step': 5686, 'epoch': 3} {'type': 'loss', 'content': 9.915219561662525e-05, 'timestamp': '2025-09-30 23:09:58.155533', 'step': 5687, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:58.214593', 'step': 5687, 'epoch': 3} {'type': 'loss', 'content': 0.0005269704852253199, 'timestamp': '2025-09-30 23:09:58.225471', 'step': 5688, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:58.284743', 'step': 5688, 'epoch': 3} {'type': 'loss', 'content': 0.014953137375414371, 'timestamp': '2025-09-30 23:09:58.288011', 'step': 5689, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:58.357050', 'step': 5689, 'epoch': 3} {'type': 'loss', 'content': 0.000872491451445967, 'timestamp': '2025-09-30 23:09:58.364476', 'step': 5690, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:58.424302', 'step': 5690, 'epoch': 3} {'type': 'loss', 'content': 0.0004787605721503496, 'timestamp': '2025-09-30 23:09:58.435385', 'step': 5691, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:58.500382', 'step': 5691, 'epoch': 3} {'type': 'loss', 'content': 0.00047003565123304725, 'timestamp': '2025-09-30 23:09:58.506574', 'step': 5692, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:58.573418', 'step': 5692, 'epoch': 3} {'type': 'loss', 'content': 0.0018744472181424499, 'timestamp': '2025-09-30 23:09:58.577062', 'step': 5693, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:58.654323', 'step': 5693, 'epoch': 3} {'type': 'loss', 'content': 0.010268723592162132, 'timestamp': '2025-09-30 23:09:58.664048', 'step': 5694, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:58.740341', 'step': 5694, 'epoch': 3} {'type': 'loss', 'content': 3.77527394448407e-05, 'timestamp': '2025-09-30 23:09:58.744737', 'step': 5695, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:58.820438', 'step': 5695, 'epoch': 3} {'type': 'loss', 'content': 0.001767016015946865, 'timestamp': '2025-09-30 23:09:58.837588', 'step': 5696, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:58.912621', 'step': 5696, 'epoch': 3} {'type': 'loss', 'content': 0.015869397670030594, 'timestamp': '2025-09-30 23:09:58.923508', 'step': 5697, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:59.001768', 'step': 5697, 'epoch': 3} {'type': 'loss', 'content': 0.0025040514301508665, 'timestamp': '2025-09-30 23:09:59.005464', 'step': 5698, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:59.075395', 'step': 5698, 'epoch': 3} {'type': 'loss', 'content': 0.005718144588172436, 'timestamp': '2025-09-30 23:09:59.079841', 'step': 5699, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:59.143199', 'step': 5699, 'epoch': 3} {'type': 'loss', 'content': 0.0004543671093415469, 'timestamp': '2025-09-30 23:09:59.157015', 'step': 5700, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:09:59.232542', 'step': 5700, 'epoch': 3} {'type': 'loss', 'content': 0.0023978441022336483, 'timestamp': '2025-09-30 23:09:59.242488', 'step': 5701, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:59.306476', 'step': 5701, 'epoch': 3} {'type': 'loss', 'content': 0.0036657308228313923, 'timestamp': '2025-09-30 23:09:59.310071', 'step': 5702, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:59.383030', 'step': 5702, 'epoch': 3} {'type': 'loss', 'content': 0.07171111553907394, 'timestamp': '2025-09-30 23:09:59.386332', 'step': 5703, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:59.470488', 'step': 5703, 'epoch': 3} {'type': 'loss', 'content': 0.003119672881439328, 'timestamp': '2025-09-30 23:09:59.480990', 'step': 5704, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:59.549523', 'step': 5704, 'epoch': 3} {'type': 'loss', 'content': 0.05778180807828903, 'timestamp': '2025-09-30 23:09:59.556372', 'step': 5705, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:59.617663', 'step': 5705, 'epoch': 3} {'type': 'loss', 'content': 0.0004521408991422504, 'timestamp': '2025-09-30 23:09:59.626328', 'step': 5706, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:59.696929', 'step': 5706, 'epoch': 3} {'type': 'loss', 'content': 0.0023479261435568333, 'timestamp': '2025-09-30 23:09:59.700920', 'step': 5707, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:59.766330', 'step': 5707, 'epoch': 3} {'type': 'loss', 'content': 0.0010598140070214868, 'timestamp': '2025-09-30 23:09:59.773774', 'step': 5708, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:59.830766', 'step': 5708, 'epoch': 3} {'type': 'loss', 'content': 0.0012063082540407777, 'timestamp': '2025-09-30 23:09:59.834220', 'step': 5709, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:09:59.893501', 'step': 5709, 'epoch': 3} {'type': 'loss', 'content': 0.0026209966745227575, 'timestamp': '2025-09-30 23:09:59.901233', 'step': 5710, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:09:59.972086', 'step': 5710, 'epoch': 3} {'type': 'loss', 'content': 0.0001578307565068826, 'timestamp': '2025-09-30 23:09:59.979268', 'step': 5711, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:00.045163', 'step': 5711, 'epoch': 3} {'type': 'loss', 'content': 0.0024753098841756582, 'timestamp': '2025-09-30 23:10:00.051874', 'step': 5712, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:00.115631', 'step': 5712, 'epoch': 3} {'type': 'loss', 'content': 0.0040986970998346806, 'timestamp': '2025-09-30 23:10:00.118926', 'step': 5713, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:00.185348', 'step': 5713, 'epoch': 3} {'type': 'loss', 'content': 0.0001858026225818321, 'timestamp': '2025-09-30 23:10:00.191823', 'step': 5714, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:00.256680', 'step': 5714, 'epoch': 3} {'type': 'loss', 'content': 0.0006992816342972219, 'timestamp': '2025-09-30 23:10:00.266035', 'step': 5715, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:00.338785', 'step': 5715, 'epoch': 3} {'type': 'loss', 'content': 0.0006344888242892921, 'timestamp': '2025-09-30 23:10:00.350485', 'step': 5716, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:10:00.423046', 'step': 5716, 'epoch': 3} {'type': 'loss', 'content': 0.0006210081046447158, 'timestamp': '2025-09-30 23:10:00.433298', 'step': 5717, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:00.505275', 'step': 5717, 'epoch': 3} {'type': 'loss', 'content': 0.007757385261356831, 'timestamp': '2025-09-30 23:10:00.510179', 'step': 5718, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:00.581164', 'step': 5718, 'epoch': 3} {'type': 'loss', 'content': 0.013246892020106316, 'timestamp': '2025-09-30 23:10:00.592479', 'step': 5719, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:00.672612', 'step': 5719, 'epoch': 3} {'type': 'loss', 'content': 0.0008015672792680562, 'timestamp': '2025-09-30 23:10:00.685211', 'step': 5720, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:00.767569', 'step': 5720, 'epoch': 3} {'type': 'loss', 'content': 0.0005958712426945567, 'timestamp': '2025-09-30 23:10:00.778747', 'step': 5721, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:00.847119', 'step': 5721, 'epoch': 3} {'type': 'loss', 'content': 0.001719062332995236, 'timestamp': '2025-09-30 23:10:00.850741', 'step': 5722, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:00.907966', 'step': 5722, 'epoch': 3} {'type': 'loss', 'content': 0.00025416340213268995, 'timestamp': '2025-09-30 23:10:00.912949', 'step': 5723, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:00.970147', 'step': 5723, 'epoch': 3} {'type': 'loss', 'content': 0.0034684224519878626, 'timestamp': '2025-09-30 23:10:00.977971', 'step': 5724, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:01.038003', 'step': 5724, 'epoch': 3} {'type': 'loss', 'content': 0.00032631869544275105, 'timestamp': '2025-09-30 23:10:01.040719', 'step': 5725, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:01.100646', 'step': 5725, 'epoch': 3} {'type': 'loss', 'content': 0.0009221804211847484, 'timestamp': '2025-09-30 23:10:01.104599', 'step': 5726, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:01.163016', 'step': 5726, 'epoch': 3} {'type': 'loss', 'content': 0.00016688792675267905, 'timestamp': '2025-09-30 23:10:01.166352', 'step': 5727, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:01.223284', 'step': 5727, 'epoch': 3} {'type': 'loss', 'content': 0.0008578127017244697, 'timestamp': '2025-09-30 23:10:01.229870', 'step': 5728, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:01.288326', 'step': 5728, 'epoch': 3} {'type': 'loss', 'content': 0.0006730156601406634, 'timestamp': '2025-09-30 23:10:01.291723', 'step': 5729, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:01.350724', 'step': 5729, 'epoch': 3} {'type': 'loss', 'content': 0.03452383354306221, 'timestamp': '2025-09-30 23:10:01.358173', 'step': 5730, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:01.424715', 'step': 5730, 'epoch': 3} {'type': 'loss', 'content': 0.014854836277663708, 'timestamp': '2025-09-30 23:10:01.433713', 'step': 5731, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:01.501304', 'step': 5731, 'epoch': 3} {'type': 'loss', 'content': 0.0023293644189834595, 'timestamp': '2025-09-30 23:10:01.511715', 'step': 5732, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:01.582799', 'step': 5732, 'epoch': 3} {'type': 'loss', 'content': 0.0019261975539848208, 'timestamp': '2025-09-30 23:10:01.592785', 'step': 5733, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:01.676211', 'step': 5733, 'epoch': 3} {'type': 'loss', 'content': 0.00042709274566732347, 'timestamp': '2025-09-30 23:10:01.686293', 'step': 5734, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:01.750963', 'step': 5734, 'epoch': 3} {'type': 'loss', 'content': 0.005156518425792456, 'timestamp': '2025-09-30 23:10:01.758866', 'step': 5735, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:01.822709', 'step': 5735, 'epoch': 3} {'type': 'loss', 'content': 0.0025187283754348755, 'timestamp': '2025-09-30 23:10:01.830913', 'step': 5736, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:01.894606', 'step': 5736, 'epoch': 3} {'type': 'loss', 'content': 0.0006634584860876203, 'timestamp': '2025-09-30 23:10:01.897226', 'step': 5737, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:01.971709', 'step': 5737, 'epoch': 3} {'type': 'loss', 'content': 0.0005597719573415816, 'timestamp': '2025-09-30 23:10:01.974456', 'step': 5738, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:02.030398', 'step': 5738, 'epoch': 3} {'type': 'loss', 'content': 0.0011177592677995563, 'timestamp': '2025-09-30 23:10:02.035690', 'step': 5739, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:02.091606', 'step': 5739, 'epoch': 3} {'type': 'loss', 'content': 0.005312207620590925, 'timestamp': '2025-09-30 23:10:02.099959', 'step': 5740, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:02.156010', 'step': 5740, 'epoch': 3} {'type': 'loss', 'content': 0.006881732493638992, 'timestamp': '2025-09-30 23:10:02.158480', 'step': 5741, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:02.219468', 'step': 5741, 'epoch': 3} {'type': 'loss', 'content': 0.010021298192441463, 'timestamp': '2025-09-30 23:10:02.225068', 'step': 5742, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:02.285766', 'step': 5742, 'epoch': 3} {'type': 'loss', 'content': 0.001115302904509008, 'timestamp': '2025-09-30 23:10:02.291427', 'step': 5743, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:02.346637', 'step': 5743, 'epoch': 3} {'type': 'loss', 'content': 0.0003187943366356194, 'timestamp': '2025-09-30 23:10:02.358921', 'step': 5744, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:02.418814', 'step': 5744, 'epoch': 3} {'type': 'loss', 'content': 0.0006186029058881104, 'timestamp': '2025-09-30 23:10:02.425469', 'step': 5745, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:02.483768', 'step': 5745, 'epoch': 3} {'type': 'loss', 'content': 0.001614944078028202, 'timestamp': '2025-09-30 23:10:02.493482', 'step': 5746, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:02.569376', 'step': 5746, 'epoch': 3} {'type': 'loss', 'content': 0.0008371178410016, 'timestamp': '2025-09-30 23:10:02.578567', 'step': 5747, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:02.654648', 'step': 5747, 'epoch': 3} {'type': 'loss', 'content': 0.0036170019302517176, 'timestamp': '2025-09-30 23:10:02.666984', 'step': 5748, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:02.734386', 'step': 5748, 'epoch': 3} {'type': 'loss', 'content': 0.05434458702802658, 'timestamp': '2025-09-30 23:10:02.741087', 'step': 5749, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:02.800654', 'step': 5749, 'epoch': 3} {'type': 'loss', 'content': 0.0012942457105964422, 'timestamp': '2025-09-30 23:10:02.810492', 'step': 5750, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:02.878409', 'step': 5750, 'epoch': 3} {'type': 'loss', 'content': 0.0012805291917175055, 'timestamp': '2025-09-30 23:10:02.886662', 'step': 5751, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:02.942119', 'step': 5751, 'epoch': 3} {'type': 'loss', 'content': 0.008227965794503689, 'timestamp': '2025-09-30 23:10:02.950566', 'step': 5752, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:03.008834', 'step': 5752, 'epoch': 3} {'type': 'loss', 'content': 0.00029317030566744506, 'timestamp': '2025-09-30 23:10:03.014402', 'step': 5753, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:03.075909', 'step': 5753, 'epoch': 3} {'type': 'loss', 'content': 0.005111422855407, 'timestamp': '2025-09-30 23:10:03.082829', 'step': 5754, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:03.146526', 'step': 5754, 'epoch': 3} {'type': 'loss', 'content': 0.0004545070987660438, 'timestamp': '2025-09-30 23:10:03.149020', 'step': 5755, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:03.209104', 'step': 5755, 'epoch': 3} {'type': 'loss', 'content': 0.0005474287318065763, 'timestamp': '2025-09-30 23:10:03.215699', 'step': 5756, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:03.271643', 'step': 5756, 'epoch': 3} {'type': 'loss', 'content': 0.0010161226382479072, 'timestamp': '2025-09-30 23:10:03.274891', 'step': 5757, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:03.329192', 'step': 5757, 'epoch': 3} {'type': 'loss', 'content': 0.00035370580735616386, 'timestamp': '2025-09-30 23:10:03.332778', 'step': 5758, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:03.392136', 'step': 5758, 'epoch': 3} {'type': 'loss', 'content': 0.002041599480435252, 'timestamp': '2025-09-30 23:10:03.398388', 'step': 5759, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:03.454100', 'step': 5759, 'epoch': 3} {'type': 'loss', 'content': 0.0411565899848938, 'timestamp': '2025-09-30 23:10:03.466516', 'step': 5760, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:03.526526', 'step': 5760, 'epoch': 3} {'type': 'loss', 'content': 0.027365049347281456, 'timestamp': '2025-09-30 23:10:03.534483', 'step': 5761, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:03.607188', 'step': 5761, 'epoch': 3} {'type': 'loss', 'content': 0.00039360453956760466, 'timestamp': '2025-09-30 23:10:03.609707', 'step': 5762, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 23:10:03.676067', 'step': 5762, 'epoch': 3} {'type': 'loss', 'content': 0.00101150490809232, 'timestamp': '2025-09-30 23:10:03.684103', 'step': 5763, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:03.753361', 'step': 5763, 'epoch': 3} {'type': 'loss', 'content': 0.0016104790847748518, 'timestamp': '2025-09-30 23:10:03.759743', 'step': 5764, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:03.819174', 'step': 5764, 'epoch': 3} {'type': 'loss', 'content': 0.00034226555726490915, 'timestamp': '2025-09-30 23:10:03.825239', 'step': 5765, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:03.884402', 'step': 5765, 'epoch': 3} {'type': 'loss', 'content': 0.005323235411196947, 'timestamp': '2025-09-30 23:10:03.888939', 'step': 5766, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:03.944698', 'step': 5766, 'epoch': 3} {'type': 'loss', 'content': 0.07308884710073471, 'timestamp': '2025-09-30 23:10:03.949405', 'step': 5767, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:04.008918', 'step': 5767, 'epoch': 3} {'type': 'loss', 'content': 0.0001970163721125573, 'timestamp': '2025-09-30 23:10:04.016292', 'step': 5768, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:04.071741', 'step': 5768, 'epoch': 3} {'type': 'loss', 'content': 0.0004101812082808465, 'timestamp': '2025-09-30 23:10:04.075162', 'step': 5769, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:04.131698', 'step': 5769, 'epoch': 3} {'type': 'loss', 'content': 0.02675783820450306, 'timestamp': '2025-09-30 23:10:04.134896', 'step': 5770, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:04.191498', 'step': 5770, 'epoch': 3} {'type': 'loss', 'content': 0.05948595330119133, 'timestamp': '2025-09-30 23:10:04.196478', 'step': 5771, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:04.253150', 'step': 5771, 'epoch': 3} {'type': 'loss', 'content': 0.00048231513937935233, 'timestamp': '2025-09-30 23:10:04.262383', 'step': 5772, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:04.327112', 'step': 5772, 'epoch': 3} {'type': 'loss', 'content': 0.0019822276663035154, 'timestamp': '2025-09-30 23:10:04.331728', 'step': 5773, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:04.401979', 'step': 5773, 'epoch': 3} {'type': 'loss', 'content': 0.00022868650557938963, 'timestamp': '2025-09-30 23:10:04.406617', 'step': 5774, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:04.466094', 'step': 5774, 'epoch': 3} {'type': 'loss', 'content': 0.001584779703989625, 'timestamp': '2025-09-30 23:10:04.469766', 'step': 5775, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:04.532853', 'step': 5775, 'epoch': 3} {'type': 'loss', 'content': 0.0005238546873442829, 'timestamp': '2025-09-30 23:10:04.543958', 'step': 5776, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1596914505344}], 'timestamp': '2025-09-30 23:10:08.875303', 'step': 5776, 'epoch': 3} {'type': 'pplx', 'content': 8075527.90891345, 'timestamp': '2025-09-30 23:10:08.878772', 'step': 5776, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:08.932663', 'step': 5776, 'epoch': 3} {'type': 'loss', 'content': 0.004728293512016535, 'timestamp': '2025-09-30 23:10:08.936211', 'step': 5777, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:08.997635', 'step': 5777, 'epoch': 3} {'type': 'loss', 'content': 0.004891710821539164, 'timestamp': '2025-09-30 23:10:08.999882', 'step': 5778, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:09.054324', 'step': 5778, 'epoch': 3} {'type': 'loss', 'content': 0.014514570124447346, 'timestamp': '2025-09-30 23:10:09.057399', 'step': 5779, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:09.115440', 'step': 5779, 'epoch': 3} {'type': 'loss', 'content': 0.0008286833763122559, 'timestamp': '2025-09-30 23:10:09.122199', 'step': 5780, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:09.186422', 'step': 5780, 'epoch': 3} {'type': 'loss', 'content': 0.017195796594023705, 'timestamp': '2025-09-30 23:10:09.194562', 'step': 5781, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:09.257308', 'step': 5781, 'epoch': 3} {'type': 'loss', 'content': 0.005297549068927765, 'timestamp': '2025-09-30 23:10:09.263457', 'step': 5782, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:09.324372', 'step': 5782, 'epoch': 3} {'type': 'loss', 'content': 0.0005169693613424897, 'timestamp': '2025-09-30 23:10:09.327215', 'step': 5783, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:09.381780', 'step': 5783, 'epoch': 3} {'type': 'loss', 'content': 0.0004100517835468054, 'timestamp': '2025-09-30 23:10:09.387970', 'step': 5784, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:09.443195', 'step': 5784, 'epoch': 3} {'type': 'loss', 'content': 0.0063618822023272514, 'timestamp': '2025-09-30 23:10:09.445823', 'step': 5785, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:09.503492', 'step': 5785, 'epoch': 3} {'type': 'loss', 'content': 0.008906678296625614, 'timestamp': '2025-09-30 23:10:09.505832', 'step': 5786, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:09.559212', 'step': 5786, 'epoch': 3} {'type': 'loss', 'content': 0.0010922211222350597, 'timestamp': '2025-09-30 23:10:09.562025', 'step': 5787, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:09.618647', 'step': 5787, 'epoch': 3} {'type': 'loss', 'content': 0.012080796994268894, 'timestamp': '2025-09-30 23:10:09.626847', 'step': 5788, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:09.689227', 'step': 5788, 'epoch': 3} {'type': 'loss', 'content': 0.0014770905254408717, 'timestamp': '2025-09-30 23:10:09.697686', 'step': 5789, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:09.751912', 'step': 5789, 'epoch': 3} {'type': 'loss', 'content': 0.0004573843034449965, 'timestamp': '2025-09-30 23:10:09.756131', 'step': 5790, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:09.812841', 'step': 5790, 'epoch': 3} {'type': 'loss', 'content': 0.0001605425786692649, 'timestamp': '2025-09-30 23:10:09.818569', 'step': 5791, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:09.892958', 'step': 5791, 'epoch': 3} {'type': 'loss', 'content': 0.0009633034351281822, 'timestamp': '2025-09-30 23:10:09.899024', 'step': 5792, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:09.954855', 'step': 5792, 'epoch': 3} {'type': 'loss', 'content': 0.011867056600749493, 'timestamp': '2025-09-30 23:10:09.957378', 'step': 5793, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:10.011823', 'step': 5793, 'epoch': 3} {'type': 'loss', 'content': 0.0014262201730161905, 'timestamp': '2025-09-30 23:10:10.014213', 'step': 5794, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:10.070166', 'step': 5794, 'epoch': 3} {'type': 'loss', 'content': 0.007011496927589178, 'timestamp': '2025-09-30 23:10:10.072301', 'step': 5795, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:10.125283', 'step': 5795, 'epoch': 3} {'type': 'loss', 'content': 0.0018721792148426175, 'timestamp': '2025-09-30 23:10:10.131227', 'step': 5796, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:10.183249', 'step': 5796, 'epoch': 3} {'type': 'loss', 'content': 0.01689991168677807, 'timestamp': '2025-09-30 23:10:10.185734', 'step': 5797, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:10.239781', 'step': 5797, 'epoch': 3} {'type': 'loss', 'content': 0.0008281556656584144, 'timestamp': '2025-09-30 23:10:10.242497', 'step': 5798, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:10.297924', 'step': 5798, 'epoch': 3} {'type': 'loss', 'content': 0.000965401588473469, 'timestamp': '2025-09-30 23:10:10.300223', 'step': 5799, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:10.353579', 'step': 5799, 'epoch': 3} {'type': 'loss', 'content': 0.0036652605049312115, 'timestamp': '2025-09-30 23:10:10.359551', 'step': 5800, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:10.412800', 'step': 5800, 'epoch': 3} {'type': 'loss', 'content': 0.055599987506866455, 'timestamp': '2025-09-30 23:10:10.415669', 'step': 5801, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:10.469731', 'step': 5801, 'epoch': 3} {'type': 'loss', 'content': 0.030731458216905594, 'timestamp': '2025-09-30 23:10:10.475990', 'step': 5802, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:10.531592', 'step': 5802, 'epoch': 3} {'type': 'loss', 'content': 0.0004761986783705652, 'timestamp': '2025-09-30 23:10:10.533883', 'step': 5803, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:10.587257', 'step': 5803, 'epoch': 3} {'type': 'loss', 'content': 0.018261300399899483, 'timestamp': '2025-09-30 23:10:10.597886', 'step': 5804, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:10.664408', 'step': 5804, 'epoch': 3} {'type': 'loss', 'content': 0.007944024167954922, 'timestamp': '2025-09-30 23:10:10.670498', 'step': 5805, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:10.732897', 'step': 5805, 'epoch': 3} {'type': 'loss', 'content': 0.00758024025708437, 'timestamp': '2025-09-30 23:10:10.736368', 'step': 5806, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:10.794789', 'step': 5806, 'epoch': 3} {'type': 'loss', 'content': 0.030878549441695213, 'timestamp': '2025-09-30 23:10:10.798111', 'step': 5807, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:10.852666', 'step': 5807, 'epoch': 3} {'type': 'loss', 'content': 0.002447203965857625, 'timestamp': '2025-09-30 23:10:10.861475', 'step': 5808, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:10.922098', 'step': 5808, 'epoch': 3} {'type': 'loss', 'content': 0.0029531470499932766, 'timestamp': '2025-09-30 23:10:10.924772', 'step': 5809, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:10.979928', 'step': 5809, 'epoch': 3} {'type': 'loss', 'content': 0.032319556921720505, 'timestamp': '2025-09-30 23:10:10.982764', 'step': 5810, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:11.037714', 'step': 5810, 'epoch': 3} {'type': 'loss', 'content': 0.0011334342416375875, 'timestamp': '2025-09-30 23:10:11.040806', 'step': 5811, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:11.095542', 'step': 5811, 'epoch': 3} {'type': 'loss', 'content': 0.002242334885522723, 'timestamp': '2025-09-30 23:10:11.102168', 'step': 5812, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:11.156323', 'step': 5812, 'epoch': 3} {'type': 'loss', 'content': 0.0007801746833138168, 'timestamp': '2025-09-30 23:10:11.159003', 'step': 5813, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:11.214713', 'step': 5813, 'epoch': 3} {'type': 'loss', 'content': 0.0015829401090741158, 'timestamp': '2025-09-30 23:10:11.217710', 'step': 5814, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:11.274084', 'step': 5814, 'epoch': 3} {'type': 'loss', 'content': 0.006616558413952589, 'timestamp': '2025-09-30 23:10:11.276609', 'step': 5815, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:11.331828', 'step': 5815, 'epoch': 3} {'type': 'loss', 'content': 0.0007156844949349761, 'timestamp': '2025-09-30 23:10:11.337885', 'step': 5816, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:11.392448', 'step': 5816, 'epoch': 3} {'type': 'loss', 'content': 0.0025598895736038685, 'timestamp': '2025-09-30 23:10:11.395032', 'step': 5817, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:11.453835', 'step': 5817, 'epoch': 3} {'type': 'loss', 'content': 0.0004017583269160241, 'timestamp': '2025-09-30 23:10:11.456059', 'step': 5818, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:11.509904', 'step': 5818, 'epoch': 3} {'type': 'loss', 'content': 0.011861703358590603, 'timestamp': '2025-09-30 23:10:11.512578', 'step': 5819, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:11.566764', 'step': 5819, 'epoch': 3} {'type': 'loss', 'content': 0.0027433098293840885, 'timestamp': '2025-09-30 23:10:11.573587', 'step': 5820, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:11.629876', 'step': 5820, 'epoch': 3} {'type': 'loss', 'content': 0.0019072899594902992, 'timestamp': '2025-09-30 23:10:11.637796', 'step': 5821, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:11.702906', 'step': 5821, 'epoch': 3} {'type': 'loss', 'content': 0.0008534920052625239, 'timestamp': '2025-09-30 23:10:11.711555', 'step': 5822, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:11.781816', 'step': 5822, 'epoch': 3} {'type': 'loss', 'content': 0.041072625666856766, 'timestamp': '2025-09-30 23:10:11.785815', 'step': 5823, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:11.845881', 'step': 5823, 'epoch': 3} {'type': 'loss', 'content': 0.0013884298969060183, 'timestamp': '2025-09-30 23:10:11.852122', 'step': 5824, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:11.907256', 'step': 5824, 'epoch': 3} {'type': 'loss', 'content': 0.0005781634827144444, 'timestamp': '2025-09-30 23:10:11.910359', 'step': 5825, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:11.964741', 'step': 5825, 'epoch': 3} {'type': 'loss', 'content': 0.0013005908112972975, 'timestamp': '2025-09-30 23:10:11.967392', 'step': 5826, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:12.023425', 'step': 5826, 'epoch': 3} {'type': 'loss', 'content': 0.0011222652392461896, 'timestamp': '2025-09-30 23:10:12.026765', 'step': 5827, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:12.080606', 'step': 5827, 'epoch': 3} {'type': 'loss', 'content': 0.0017687658546492457, 'timestamp': '2025-09-30 23:10:12.087806', 'step': 5828, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:12.142486', 'step': 5828, 'epoch': 3} {'type': 'loss', 'content': 0.008147424086928368, 'timestamp': '2025-09-30 23:10:12.145510', 'step': 5829, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:12.203635', 'step': 5829, 'epoch': 3} {'type': 'loss', 'content': 0.00029853967134840786, 'timestamp': '2025-09-30 23:10:12.206019', 'step': 5830, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:12.260429', 'step': 5830, 'epoch': 3} {'type': 'loss', 'content': 0.0019437819719314575, 'timestamp': '2025-09-30 23:10:12.263285', 'step': 5831, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:12.317096', 'step': 5831, 'epoch': 3} {'type': 'loss', 'content': 0.012570134364068508, 'timestamp': '2025-09-30 23:10:12.322968', 'step': 5832, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:10:12.375356', 'step': 5832, 'epoch': 3} {'type': 'loss', 'content': 0.0005426952266134322, 'timestamp': '2025-09-30 23:10:12.378444', 'step': 5833, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:12.438200', 'step': 5833, 'epoch': 3} {'type': 'loss', 'content': 0.002616443205624819, 'timestamp': '2025-09-30 23:10:12.440915', 'step': 5834, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:12.494276', 'step': 5834, 'epoch': 3} {'type': 'loss', 'content': 0.000560583604965359, 'timestamp': '2025-09-30 23:10:12.497927', 'step': 5835, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:12.553948', 'step': 5835, 'epoch': 3} {'type': 'loss', 'content': 0.026981741189956665, 'timestamp': '2025-09-30 23:10:12.560609', 'step': 5836, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:12.613712', 'step': 5836, 'epoch': 3} {'type': 'loss', 'content': 0.04749567061662674, 'timestamp': '2025-09-30 23:10:12.616495', 'step': 5837, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:12.675788', 'step': 5837, 'epoch': 3} {'type': 'loss', 'content': 0.0010071711149066687, 'timestamp': '2025-09-30 23:10:12.678712', 'step': 5838, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:12.741611', 'step': 5838, 'epoch': 3} {'type': 'loss', 'content': 0.005111701786518097, 'timestamp': '2025-09-30 23:10:12.747118', 'step': 5839, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:12.809686', 'step': 5839, 'epoch': 3} {'type': 'loss', 'content': 0.01009322702884674, 'timestamp': '2025-09-30 23:10:12.818269', 'step': 5840, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:12.908404', 'step': 5840, 'epoch': 3} {'type': 'loss', 'content': 0.0008597162668593228, 'timestamp': '2025-09-30 23:10:12.913689', 'step': 5841, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:13.015055', 'step': 5841, 'epoch': 3} {'type': 'loss', 'content': 0.006969036068767309, 'timestamp': '2025-09-30 23:10:13.027175', 'step': 5842, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:13.122163', 'step': 5842, 'epoch': 3} {'type': 'loss', 'content': 0.000511590507812798, 'timestamp': '2025-09-30 23:10:13.127483', 'step': 5843, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:13.198820', 'step': 5843, 'epoch': 3} {'type': 'loss', 'content': 0.0007466378738172352, 'timestamp': '2025-09-30 23:10:13.212002', 'step': 5844, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:13.270917', 'step': 5844, 'epoch': 3} {'type': 'loss', 'content': 0.034246060997247696, 'timestamp': '2025-09-30 23:10:13.276153', 'step': 5845, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:13.336741', 'step': 5845, 'epoch': 3} {'type': 'loss', 'content': 0.05160754546523094, 'timestamp': '2025-09-30 23:10:13.342813', 'step': 5846, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:13.410341', 'step': 5846, 'epoch': 3} {'type': 'loss', 'content': 0.0009106699726544321, 'timestamp': '2025-09-30 23:10:13.416566', 'step': 5847, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:13.479172', 'step': 5847, 'epoch': 3} {'type': 'loss', 'content': 0.10332673788070679, 'timestamp': '2025-09-30 23:10:13.492992', 'step': 5848, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:13.557956', 'step': 5848, 'epoch': 3} {'type': 'loss', 'content': 0.004147292114794254, 'timestamp': '2025-09-30 23:10:13.562258', 'step': 5849, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:13.621334', 'step': 5849, 'epoch': 3} {'type': 'loss', 'content': 0.004738672636449337, 'timestamp': '2025-09-30 23:10:13.626360', 'step': 5850, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:13.690748', 'step': 5850, 'epoch': 3} {'type': 'loss', 'content': 0.0016579177463427186, 'timestamp': '2025-09-30 23:10:13.697600', 'step': 5851, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:13.772985', 'step': 5851, 'epoch': 3} {'type': 'loss', 'content': 0.0015493531245738268, 'timestamp': '2025-09-30 23:10:13.783200', 'step': 5852, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:13.841736', 'step': 5852, 'epoch': 3} {'type': 'loss', 'content': 0.0005471481126733124, 'timestamp': '2025-09-30 23:10:13.851735', 'step': 5853, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:13.923935', 'step': 5853, 'epoch': 3} {'type': 'loss', 'content': 0.0019090755376964808, 'timestamp': '2025-09-30 23:10:13.930809', 'step': 5854, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:13.996598', 'step': 5854, 'epoch': 3} {'type': 'loss', 'content': 0.003772052237764001, 'timestamp': '2025-09-30 23:10:14.000830', 'step': 5855, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:14.073020', 'step': 5855, 'epoch': 3} {'type': 'loss', 'content': 0.002452939748764038, 'timestamp': '2025-09-30 23:10:14.080588', 'step': 5856, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:14.149386', 'step': 5856, 'epoch': 3} {'type': 'loss', 'content': 0.006616051308810711, 'timestamp': '2025-09-30 23:10:14.153173', 'step': 5857, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:14.209859', 'step': 5857, 'epoch': 3} {'type': 'loss', 'content': 0.0008847444551065564, 'timestamp': '2025-09-30 23:10:14.214193', 'step': 5858, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:14.274602', 'step': 5858, 'epoch': 3} {'type': 'loss', 'content': 0.0008551134960725904, 'timestamp': '2025-09-30 23:10:14.279511', 'step': 5859, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:14.335892', 'step': 5859, 'epoch': 3} {'type': 'loss', 'content': 0.0005422722897492349, 'timestamp': '2025-09-30 23:10:14.356099', 'step': 5860, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:14.422357', 'step': 5860, 'epoch': 3} {'type': 'loss', 'content': 0.00496200704947114, 'timestamp': '2025-09-30 23:10:14.426404', 'step': 5861, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:14.487633', 'step': 5861, 'epoch': 3} {'type': 'loss', 'content': 0.030150681734085083, 'timestamp': '2025-09-30 23:10:14.490737', 'step': 5862, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:14.548825', 'step': 5862, 'epoch': 3} {'type': 'loss', 'content': 0.0005391962477006018, 'timestamp': '2025-09-30 23:10:14.551117', 'step': 5863, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:14.606312', 'step': 5863, 'epoch': 3} {'type': 'loss', 'content': 0.0014522381825372577, 'timestamp': '2025-09-30 23:10:14.614797', 'step': 5864, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:14.671298', 'step': 5864, 'epoch': 3} {'type': 'loss', 'content': 0.0034101116470992565, 'timestamp': '2025-09-30 23:10:14.676533', 'step': 5865, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:14.743341', 'step': 5865, 'epoch': 3} {'type': 'loss', 'content': 0.003223618259653449, 'timestamp': '2025-09-30 23:10:14.747962', 'step': 5866, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:14.810932', 'step': 5866, 'epoch': 3} {'type': 'loss', 'content': 0.02837921492755413, 'timestamp': '2025-09-30 23:10:14.818125', 'step': 5867, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:14.883267', 'step': 5867, 'epoch': 3} {'type': 'loss', 'content': 0.0011226293863728642, 'timestamp': '2025-09-30 23:10:14.892008', 'step': 5868, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:14.954806', 'step': 5868, 'epoch': 3} {'type': 'loss', 'content': 0.033233337104320526, 'timestamp': '2025-09-30 23:10:14.958715', 'step': 5869, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:15.020578', 'step': 5869, 'epoch': 3} {'type': 'loss', 'content': 0.002229837002232671, 'timestamp': '2025-09-30 23:10:15.029926', 'step': 5870, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:15.086732', 'step': 5870, 'epoch': 3} {'type': 'loss', 'content': 0.008199290372431278, 'timestamp': '2025-09-30 23:10:15.089871', 'step': 5871, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:15.145634', 'step': 5871, 'epoch': 3} {'type': 'loss', 'content': 0.007650763727724552, 'timestamp': '2025-09-30 23:10:15.152395', 'step': 5872, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:15.207781', 'step': 5872, 'epoch': 3} {'type': 'loss', 'content': 0.0031378958374261856, 'timestamp': '2025-09-30 23:10:15.211153', 'step': 5873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:15.266339', 'step': 5873, 'epoch': 3} {'type': 'loss', 'content': 0.00609446270391345, 'timestamp': '2025-09-30 23:10:15.269837', 'step': 5874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:15.324445', 'step': 5874, 'epoch': 3} {'type': 'loss', 'content': 0.007676268462091684, 'timestamp': '2025-09-30 23:10:15.327233', 'step': 5875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:15.383386', 'step': 5875, 'epoch': 3} {'type': 'loss', 'content': 0.0015330956084653735, 'timestamp': '2025-09-30 23:10:15.389736', 'step': 5876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:15.445695', 'step': 5876, 'epoch': 3} {'type': 'loss', 'content': 0.008332515135407448, 'timestamp': '2025-09-30 23:10:15.449550', 'step': 5877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:15.505857', 'step': 5877, 'epoch': 3} {'type': 'loss', 'content': 0.006022018380463123, 'timestamp': '2025-09-30 23:10:15.509401', 'step': 5878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:15.572143', 'step': 5878, 'epoch': 3} {'type': 'loss', 'content': 0.00450541079044342, 'timestamp': '2025-09-30 23:10:15.574927', 'step': 5879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:15.628467', 'step': 5879, 'epoch': 3} {'type': 'loss', 'content': 0.019293829798698425, 'timestamp': '2025-09-30 23:10:15.634581', 'step': 5880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:15.688276', 'step': 5880, 'epoch': 3} {'type': 'loss', 'content': 0.03078918345272541, 'timestamp': '2025-09-30 23:10:15.692791', 'step': 5881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:15.751184', 'step': 5881, 'epoch': 3} {'type': 'loss', 'content': 0.004477228503674269, 'timestamp': '2025-09-30 23:10:15.755089', 'step': 5882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:15.814751', 'step': 5882, 'epoch': 3} {'type': 'loss', 'content': 0.0261105727404356, 'timestamp': '2025-09-30 23:10:15.819346', 'step': 5883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:15.878364', 'step': 5883, 'epoch': 3} {'type': 'loss', 'content': 0.0026075642090290785, 'timestamp': '2025-09-30 23:10:15.885450', 'step': 5884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:15.941042', 'step': 5884, 'epoch': 3} {'type': 'loss', 'content': 0.01426472794264555, 'timestamp': '2025-09-30 23:10:15.943375', 'step': 5885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:15.998161', 'step': 5885, 'epoch': 3} {'type': 'loss', 'content': 0.0012746657012030482, 'timestamp': '2025-09-30 23:10:16.000810', 'step': 5886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:16.056427', 'step': 5886, 'epoch': 3} {'type': 'loss', 'content': 0.00830751657485962, 'timestamp': '2025-09-30 23:10:16.058839', 'step': 5887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:16.112369', 'step': 5887, 'epoch': 3} {'type': 'loss', 'content': 0.0024254764430224895, 'timestamp': '2025-09-30 23:10:16.118955', 'step': 5888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:16.172052', 'step': 5888, 'epoch': 3} {'type': 'loss', 'content': 0.023816566914319992, 'timestamp': '2025-09-30 23:10:16.174724', 'step': 5889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:16.228219', 'step': 5889, 'epoch': 3} {'type': 'loss', 'content': 0.0014662619214504957, 'timestamp': '2025-09-30 23:10:16.230854', 'step': 5890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:16.284287', 'step': 5890, 'epoch': 3} {'type': 'loss', 'content': 0.002864920301362872, 'timestamp': '2025-09-30 23:10:16.286757', 'step': 5891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:16.340574', 'step': 5891, 'epoch': 3} {'type': 'loss', 'content': 0.0010539813665673137, 'timestamp': '2025-09-30 23:10:16.346512', 'step': 5892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:16.398893', 'step': 5892, 'epoch': 3} {'type': 'loss', 'content': 0.009346929378807545, 'timestamp': '2025-09-30 23:10:16.401505', 'step': 5893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:16.456561', 'step': 5893, 'epoch': 3} {'type': 'loss', 'content': 0.041703056544065475, 'timestamp': '2025-09-30 23:10:16.458985', 'step': 5894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:16.512717', 'step': 5894, 'epoch': 3} {'type': 'loss', 'content': 0.019581621512770653, 'timestamp': '2025-09-30 23:10:16.515472', 'step': 5895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:16.568888', 'step': 5895, 'epoch': 3} {'type': 'loss', 'content': 0.008535207249224186, 'timestamp': '2025-09-30 23:10:16.575228', 'step': 5896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:16.628115', 'step': 5896, 'epoch': 3} {'type': 'loss', 'content': 0.00578780472278595, 'timestamp': '2025-09-30 23:10:16.630606', 'step': 5897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:16.683627', 'step': 5897, 'epoch': 3} {'type': 'loss', 'content': 0.003889763029292226, 'timestamp': '2025-09-30 23:10:16.686702', 'step': 5898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:16.742516', 'step': 5898, 'epoch': 3} {'type': 'loss', 'content': 0.0019726997707039118, 'timestamp': '2025-09-30 23:10:16.745246', 'step': 5899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:16.801918', 'step': 5899, 'epoch': 3} {'type': 'loss', 'content': 0.010740899480879307, 'timestamp': '2025-09-30 23:10:16.807930', 'step': 5900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:16.861522', 'step': 5900, 'epoch': 3} {'type': 'loss', 'content': 0.005162722896784544, 'timestamp': '2025-09-30 23:10:16.864638', 'step': 5901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:16.921779', 'step': 5901, 'epoch': 3} {'type': 'loss', 'content': 0.0020304948557168245, 'timestamp': '2025-09-30 23:10:16.924901', 'step': 5902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:16.978318', 'step': 5902, 'epoch': 3} {'type': 'loss', 'content': 0.00682800030335784, 'timestamp': '2025-09-30 23:10:16.980966', 'step': 5903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:17.035968', 'step': 5903, 'epoch': 3} {'type': 'loss', 'content': 0.0013540420914068818, 'timestamp': '2025-09-30 23:10:17.045244', 'step': 5904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:17.099481', 'step': 5904, 'epoch': 3} {'type': 'loss', 'content': 0.0018465594621375203, 'timestamp': '2025-09-30 23:10:17.102176', 'step': 5905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:17.156779', 'step': 5905, 'epoch': 3} {'type': 'loss', 'content': 0.000522125163115561, 'timestamp': '2025-09-30 23:10:17.159308', 'step': 5906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:17.214066', 'step': 5906, 'epoch': 3} {'type': 'loss', 'content': 0.014426366426050663, 'timestamp': '2025-09-30 23:10:17.216649', 'step': 5907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:17.270728', 'step': 5907, 'epoch': 3} {'type': 'loss', 'content': 0.0009599601035006344, 'timestamp': '2025-09-30 23:10:17.276825', 'step': 5908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:17.330104', 'step': 5908, 'epoch': 3} {'type': 'loss', 'content': 0.003704459173604846, 'timestamp': '2025-09-30 23:10:17.332525', 'step': 5909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:17.385112', 'step': 5909, 'epoch': 3} {'type': 'loss', 'content': 0.0019634647760540247, 'timestamp': '2025-09-30 23:10:17.387525', 'step': 5910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:17.440167', 'step': 5910, 'epoch': 3} {'type': 'loss', 'content': 0.0020689174998551607, 'timestamp': '2025-09-30 23:10:17.442384', 'step': 5911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:17.494890', 'step': 5911, 'epoch': 3} {'type': 'loss', 'content': 0.003178196493536234, 'timestamp': '2025-09-30 23:10:17.501380', 'step': 5912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:17.553575', 'step': 5912, 'epoch': 3} {'type': 'loss', 'content': 0.010347162373363972, 'timestamp': '2025-09-30 23:10:17.555835', 'step': 5913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:17.608639', 'step': 5913, 'epoch': 3} {'type': 'loss', 'content': 0.015956932678818703, 'timestamp': '2025-09-30 23:10:17.610855', 'step': 5914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:17.664260', 'step': 5914, 'epoch': 3} {'type': 'loss', 'content': 0.0031975156161934137, 'timestamp': '2025-09-30 23:10:17.668312', 'step': 5915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:17.724254', 'step': 5915, 'epoch': 3} {'type': 'loss', 'content': 0.00732384342700243, 'timestamp': '2025-09-30 23:10:17.731378', 'step': 5916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:17.787462', 'step': 5916, 'epoch': 3} {'type': 'loss', 'content': 0.00014491014007944614, 'timestamp': '2025-09-30 23:10:17.789827', 'step': 5917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:17.843057', 'step': 5917, 'epoch': 3} {'type': 'loss', 'content': 0.0010386533103883266, 'timestamp': '2025-09-30 23:10:17.845444', 'step': 5918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:17.898053', 'step': 5918, 'epoch': 3} {'type': 'loss', 'content': 0.002466093748807907, 'timestamp': '2025-09-30 23:10:17.902136', 'step': 5919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:17.955021', 'step': 5919, 'epoch': 3} {'type': 'loss', 'content': 0.0010258227121084929, 'timestamp': '2025-09-30 23:10:17.960843', 'step': 5920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:18.012989', 'step': 5920, 'epoch': 3} {'type': 'loss', 'content': 0.008649867959320545, 'timestamp': '2025-09-30 23:10:18.016186', 'step': 5921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:18.068616', 'step': 5921, 'epoch': 3} {'type': 'loss', 'content': 0.004044689238071442, 'timestamp': '2025-09-30 23:10:18.072219', 'step': 5922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:18.126811', 'step': 5922, 'epoch': 3} {'type': 'loss', 'content': 0.013115644454956055, 'timestamp': '2025-09-30 23:10:18.129221', 'step': 5923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:18.182698', 'step': 5923, 'epoch': 3} {'type': 'loss', 'content': 0.0001646142773097381, 'timestamp': '2025-09-30 23:10:18.188493', 'step': 5924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:18.241209', 'step': 5924, 'epoch': 3} {'type': 'loss', 'content': 0.0006281808600760996, 'timestamp': '2025-09-30 23:10:18.245399', 'step': 5925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:18.303430', 'step': 5925, 'epoch': 3} {'type': 'loss', 'content': 0.0077399299480021, 'timestamp': '2025-09-30 23:10:18.308152', 'step': 5926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:18.365650', 'step': 5926, 'epoch': 3} {'type': 'loss', 'content': 0.041938673704862595, 'timestamp': '2025-09-30 23:10:18.370916', 'step': 5927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:18.428601', 'step': 5927, 'epoch': 3} {'type': 'loss', 'content': 0.00032888370333239436, 'timestamp': '2025-09-30 23:10:18.434604', 'step': 5928, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1596914505344}], 'timestamp': '2025-09-30 23:10:21.900209', 'step': 5928, 'epoch': 3} {'type': 'pplx', 'content': 8286817.018538356, 'timestamp': '2025-09-30 23:10:21.903028', 'step': 5928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:21.954508', 'step': 5928, 'epoch': 3} {'type': 'loss', 'content': 0.00035724256304092705, 'timestamp': '2025-09-30 23:10:21.957207', 'step': 5929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:22.012049', 'step': 5929, 'epoch': 3} {'type': 'loss', 'content': 0.0012593872379511595, 'timestamp': '2025-09-30 23:10:22.014282', 'step': 5930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:22.068311', 'step': 5930, 'epoch': 3} {'type': 'loss', 'content': 0.0027955633122473955, 'timestamp': '2025-09-30 23:10:22.071609', 'step': 5931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:22.127620', 'step': 5931, 'epoch': 3} {'type': 'loss', 'content': 0.0004946928238496184, 'timestamp': '2025-09-30 23:10:22.133731', 'step': 5932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:22.187992', 'step': 5932, 'epoch': 3} {'type': 'loss', 'content': 0.00027488000341691077, 'timestamp': '2025-09-30 23:10:22.190622', 'step': 5933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:22.244031', 'step': 5933, 'epoch': 3} {'type': 'loss', 'content': 0.0009675441542640328, 'timestamp': '2025-09-30 23:10:22.248797', 'step': 5934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:22.304813', 'step': 5934, 'epoch': 3} {'type': 'loss', 'content': 0.04839849844574928, 'timestamp': '2025-09-30 23:10:22.310411', 'step': 5935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:22.366752', 'step': 5935, 'epoch': 3} {'type': 'loss', 'content': 0.0005182224558666348, 'timestamp': '2025-09-30 23:10:22.373525', 'step': 5936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:22.429377', 'step': 5936, 'epoch': 3} {'type': 'loss', 'content': 0.0006342340493574739, 'timestamp': '2025-09-30 23:10:22.432487', 'step': 5937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:22.488072', 'step': 5937, 'epoch': 3} {'type': 'loss', 'content': 0.005425011273473501, 'timestamp': '2025-09-30 23:10:22.490480', 'step': 5938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:22.548868', 'step': 5938, 'epoch': 3} {'type': 'loss', 'content': 0.0005130675272084773, 'timestamp': '2025-09-30 23:10:22.552707', 'step': 5939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:22.608816', 'step': 5939, 'epoch': 3} {'type': 'loss', 'content': 0.000509886653162539, 'timestamp': '2025-09-30 23:10:22.615115', 'step': 5940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:22.668745', 'step': 5940, 'epoch': 3} {'type': 'loss', 'content': 0.0001564677368151024, 'timestamp': '2025-09-30 23:10:22.672176', 'step': 5941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:22.726598', 'step': 5941, 'epoch': 3} {'type': 'loss', 'content': 7.405470387311652e-05, 'timestamp': '2025-09-30 23:10:22.732209', 'step': 5942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:22.791613', 'step': 5942, 'epoch': 3} {'type': 'loss', 'content': 0.0002950316702481359, 'timestamp': '2025-09-30 23:10:22.796628', 'step': 5943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:22.856242', 'step': 5943, 'epoch': 3} {'type': 'loss', 'content': 0.00031609932193532586, 'timestamp': '2025-09-30 23:10:22.863575', 'step': 5944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:22.921870', 'step': 5944, 'epoch': 3} {'type': 'loss', 'content': 0.0001723530876915902, 'timestamp': '2025-09-30 23:10:22.926016', 'step': 5945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:22.982662', 'step': 5945, 'epoch': 3} {'type': 'loss', 'content': 0.03738594055175781, 'timestamp': '2025-09-30 23:10:22.986306', 'step': 5946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:23.041052', 'step': 5946, 'epoch': 3} {'type': 'loss', 'content': 8.258706657215953e-05, 'timestamp': '2025-09-30 23:10:23.044535', 'step': 5947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:23.099090', 'step': 5947, 'epoch': 3} {'type': 'loss', 'content': 0.06329602748155594, 'timestamp': '2025-09-30 23:10:23.105767', 'step': 5948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:23.160999', 'step': 5948, 'epoch': 3} {'type': 'loss', 'content': 0.00014083208225201815, 'timestamp': '2025-09-30 23:10:23.164683', 'step': 5949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:23.220146', 'step': 5949, 'epoch': 3} {'type': 'loss', 'content': 0.00023325730580836535, 'timestamp': '2025-09-30 23:10:23.223523', 'step': 5950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:23.279645', 'step': 5950, 'epoch': 3} {'type': 'loss', 'content': 0.0021284474059939384, 'timestamp': '2025-09-30 23:10:23.282829', 'step': 5951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:23.340322', 'step': 5951, 'epoch': 3} {'type': 'loss', 'content': 0.04009539261460304, 'timestamp': '2025-09-30 23:10:23.347318', 'step': 5952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:23.406005', 'step': 5952, 'epoch': 3} {'type': 'loss', 'content': 0.0005776105681434274, 'timestamp': '2025-09-30 23:10:23.409277', 'step': 5953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:23.468335', 'step': 5953, 'epoch': 3} {'type': 'loss', 'content': 0.0012245874386280775, 'timestamp': '2025-09-30 23:10:23.471534', 'step': 5954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:23.529030', 'step': 5954, 'epoch': 3} {'type': 'loss', 'content': 0.00026702924515120685, 'timestamp': '2025-09-30 23:10:23.532097', 'step': 5955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:23.591821', 'step': 5955, 'epoch': 3} {'type': 'loss', 'content': 0.0001465149543946609, 'timestamp': '2025-09-30 23:10:23.600393', 'step': 5956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:23.655437', 'step': 5956, 'epoch': 3} {'type': 'loss', 'content': 0.027659254148602486, 'timestamp': '2025-09-30 23:10:23.662890', 'step': 5957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:23.724305', 'step': 5957, 'epoch': 3} {'type': 'loss', 'content': 0.00019167362188454717, 'timestamp': '2025-09-30 23:10:23.729743', 'step': 5958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:23.797993', 'step': 5958, 'epoch': 3} {'type': 'loss', 'content': 0.004012221470475197, 'timestamp': '2025-09-30 23:10:23.802936', 'step': 5959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:23.865474', 'step': 5959, 'epoch': 3} {'type': 'loss', 'content': 0.002997406991198659, 'timestamp': '2025-09-30 23:10:23.873640', 'step': 5960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:23.936018', 'step': 5960, 'epoch': 3} {'type': 'loss', 'content': 9.392393985763192e-05, 'timestamp': '2025-09-30 23:10:23.939339', 'step': 5961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:23.995436', 'step': 5961, 'epoch': 3} {'type': 'loss', 'content': 0.0004922862281091511, 'timestamp': '2025-09-30 23:10:24.001443', 'step': 5962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:24.059985', 'step': 5962, 'epoch': 3} {'type': 'loss', 'content': 0.0002366089029237628, 'timestamp': '2025-09-30 23:10:24.063107', 'step': 5963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:24.119416', 'step': 5963, 'epoch': 3} {'type': 'loss', 'content': 0.0011294384021311998, 'timestamp': '2025-09-30 23:10:24.126826', 'step': 5964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:24.182574', 'step': 5964, 'epoch': 3} {'type': 'loss', 'content': 0.0003945974458474666, 'timestamp': '2025-09-30 23:10:24.188651', 'step': 5965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:24.246074', 'step': 5965, 'epoch': 3} {'type': 'loss', 'content': 0.006092487368732691, 'timestamp': '2025-09-30 23:10:24.249604', 'step': 5966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:24.309295', 'step': 5966, 'epoch': 3} {'type': 'loss', 'content': 0.035365890711545944, 'timestamp': '2025-09-30 23:10:24.313765', 'step': 5967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:24.378299', 'step': 5967, 'epoch': 3} {'type': 'loss', 'content': 0.005437840707600117, 'timestamp': '2025-09-30 23:10:24.385139', 'step': 5968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:24.443319', 'step': 5968, 'epoch': 3} {'type': 'loss', 'content': 0.00015397775860037655, 'timestamp': '2025-09-30 23:10:24.448677', 'step': 5969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:24.508716', 'step': 5969, 'epoch': 3} {'type': 'loss', 'content': 0.0012533953413367271, 'timestamp': '2025-09-30 23:10:24.513604', 'step': 5970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:24.573506', 'step': 5970, 'epoch': 3} {'type': 'loss', 'content': 0.0002541947178542614, 'timestamp': '2025-09-30 23:10:24.576338', 'step': 5971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:24.633426', 'step': 5971, 'epoch': 3} {'type': 'loss', 'content': 8.746279490878806e-05, 'timestamp': '2025-09-30 23:10:24.640946', 'step': 5972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:24.695635', 'step': 5972, 'epoch': 3} {'type': 'loss', 'content': 0.015170035883784294, 'timestamp': '2025-09-30 23:10:24.700226', 'step': 5973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:24.756426', 'step': 5973, 'epoch': 3} {'type': 'loss', 'content': 0.0010824274504557252, 'timestamp': '2025-09-30 23:10:24.762827', 'step': 5974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:24.824783', 'step': 5974, 'epoch': 3} {'type': 'loss', 'content': 0.0011442947434261441, 'timestamp': '2025-09-30 23:10:24.829077', 'step': 5975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:24.886363', 'step': 5975, 'epoch': 3} {'type': 'loss', 'content': 0.0031795515678822994, 'timestamp': '2025-09-30 23:10:24.894742', 'step': 5976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:24.953372', 'step': 5976, 'epoch': 3} {'type': 'loss', 'content': 0.0005101829883642495, 'timestamp': '2025-09-30 23:10:24.956855', 'step': 5977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:25.016700', 'step': 5977, 'epoch': 3} {'type': 'loss', 'content': 0.008322707377374172, 'timestamp': '2025-09-30 23:10:25.019393', 'step': 5978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:25.079741', 'step': 5978, 'epoch': 3} {'type': 'loss', 'content': 0.0012470587389543653, 'timestamp': '2025-09-30 23:10:25.083825', 'step': 5979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:25.144296', 'step': 5979, 'epoch': 3} {'type': 'loss', 'content': 0.0001453622680855915, 'timestamp': '2025-09-30 23:10:25.151552', 'step': 5980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:25.210391', 'step': 5980, 'epoch': 3} {'type': 'loss', 'content': 0.0010744595201686025, 'timestamp': '2025-09-30 23:10:25.215428', 'step': 5981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:25.273511', 'step': 5981, 'epoch': 3} {'type': 'loss', 'content': 9.631300054024905e-05, 'timestamp': '2025-09-30 23:10:25.277186', 'step': 5982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:25.336372', 'step': 5982, 'epoch': 3} {'type': 'loss', 'content': 4.4949301809538156e-05, 'timestamp': '2025-09-30 23:10:25.341507', 'step': 5983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:25.397174', 'step': 5983, 'epoch': 3} {'type': 'loss', 'content': 0.018741954118013382, 'timestamp': '2025-09-30 23:10:25.405095', 'step': 5984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:25.462636', 'step': 5984, 'epoch': 3} {'type': 'loss', 'content': 0.0011502528795972466, 'timestamp': '2025-09-30 23:10:25.466629', 'step': 5985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:25.536173', 'step': 5985, 'epoch': 3} {'type': 'loss', 'content': 0.0014714867575094104, 'timestamp': '2025-09-30 23:10:25.540205', 'step': 5986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:25.607388', 'step': 5986, 'epoch': 3} {'type': 'loss', 'content': 0.0029442221857607365, 'timestamp': '2025-09-30 23:10:25.610750', 'step': 5987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:25.669289', 'step': 5987, 'epoch': 3} {'type': 'loss', 'content': 0.00391211686655879, 'timestamp': '2025-09-30 23:10:25.675182', 'step': 5988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:25.728422', 'step': 5988, 'epoch': 3} {'type': 'loss', 'content': 0.0007392655243165791, 'timestamp': '2025-09-30 23:10:25.731982', 'step': 5989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:25.788478', 'step': 5989, 'epoch': 3} {'type': 'loss', 'content': 0.0007470081909559667, 'timestamp': '2025-09-30 23:10:25.792850', 'step': 5990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:25.853210', 'step': 5990, 'epoch': 3} {'type': 'loss', 'content': 0.022102663293480873, 'timestamp': '2025-09-30 23:10:25.856264', 'step': 5991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:25.911828', 'step': 5991, 'epoch': 3} {'type': 'loss', 'content': 0.012739666737616062, 'timestamp': '2025-09-30 23:10:25.918028', 'step': 5992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:25.971387', 'step': 5992, 'epoch': 3} {'type': 'loss', 'content': 0.0013721513096243143, 'timestamp': '2025-09-30 23:10:25.974350', 'step': 5993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:26.030440', 'step': 5993, 'epoch': 3} {'type': 'loss', 'content': 6.422742444556206e-05, 'timestamp': '2025-09-30 23:10:26.036144', 'step': 5994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:26.098060', 'step': 5994, 'epoch': 3} {'type': 'loss', 'content': 0.0006605169037356973, 'timestamp': '2025-09-30 23:10:26.100938', 'step': 5995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:26.161967', 'step': 5995, 'epoch': 3} {'type': 'loss', 'content': 0.0015257077757269144, 'timestamp': '2025-09-30 23:10:26.168535', 'step': 5996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:26.222202', 'step': 5996, 'epoch': 3} {'type': 'loss', 'content': 0.0016060899943113327, 'timestamp': '2025-09-30 23:10:26.225446', 'step': 5997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:26.280755', 'step': 5997, 'epoch': 3} {'type': 'loss', 'content': 0.0032480282243341208, 'timestamp': '2025-09-30 23:10:26.285054', 'step': 5998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:26.344084', 'step': 5998, 'epoch': 3} {'type': 'loss', 'content': 0.00012535127461887896, 'timestamp': '2025-09-30 23:10:26.347599', 'step': 5999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:26.404879', 'step': 5999, 'epoch': 3} {'type': 'loss', 'content': 0.01455560140311718, 'timestamp': '2025-09-30 23:10:26.410889', 'step': 6000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 6000', 'timestamp': '2025-09-30 23:10:26.815118', 'step': 6000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:26.867804', 'step': 6000, 'epoch': 3} {'type': 'loss', 'content': 0.004509835038334131, 'timestamp': '2025-09-30 23:10:26.870324', 'step': 6001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:26.924645', 'step': 6001, 'epoch': 3} {'type': 'loss', 'content': 0.0004100250662304461, 'timestamp': '2025-09-30 23:10:26.927632', 'step': 6002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:26.984164', 'step': 6002, 'epoch': 3} {'type': 'loss', 'content': 0.0002813061873894185, 'timestamp': '2025-09-30 23:10:26.986603', 'step': 6003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:27.041003', 'step': 6003, 'epoch': 3} {'type': 'loss', 'content': 0.011141886003315449, 'timestamp': '2025-09-30 23:10:27.049669', 'step': 6004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:27.109106', 'step': 6004, 'epoch': 3} {'type': 'loss', 'content': 0.00031936209416016936, 'timestamp': '2025-09-30 23:10:27.114336', 'step': 6005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:27.175628', 'step': 6005, 'epoch': 3} {'type': 'loss', 'content': 0.00013752291852142662, 'timestamp': '2025-09-30 23:10:27.179505', 'step': 6006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:27.236838', 'step': 6006, 'epoch': 3} {'type': 'loss', 'content': 0.0002226307988166809, 'timestamp': '2025-09-30 23:10:27.239656', 'step': 6007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:27.294414', 'step': 6007, 'epoch': 3} {'type': 'loss', 'content': 0.00045458233216777444, 'timestamp': '2025-09-30 23:10:27.300172', 'step': 6008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:27.353637', 'step': 6008, 'epoch': 3} {'type': 'loss', 'content': 0.0007995412452146411, 'timestamp': '2025-09-30 23:10:27.356085', 'step': 6009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:27.410591', 'step': 6009, 'epoch': 3} {'type': 'loss', 'content': 0.005544024053961039, 'timestamp': '2025-09-30 23:10:27.413558', 'step': 6010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:27.471637', 'step': 6010, 'epoch': 3} {'type': 'loss', 'content': 0.0020277455914765596, 'timestamp': '2025-09-30 23:10:27.475080', 'step': 6011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:27.536307', 'step': 6011, 'epoch': 3} {'type': 'loss', 'content': 0.0019447628874331713, 'timestamp': '2025-09-30 23:10:27.544789', 'step': 6012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:27.600699', 'step': 6012, 'epoch': 3} {'type': 'loss', 'content': 0.005257063079625368, 'timestamp': '2025-09-30 23:10:27.603639', 'step': 6013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:27.658809', 'step': 6013, 'epoch': 3} {'type': 'loss', 'content': 0.013446548953652382, 'timestamp': '2025-09-30 23:10:27.661603', 'step': 6014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:27.715362', 'step': 6014, 'epoch': 3} {'type': 'loss', 'content': 0.0006646720576100051, 'timestamp': '2025-09-30 23:10:27.719740', 'step': 6015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:27.787845', 'step': 6015, 'epoch': 3} {'type': 'loss', 'content': 0.00021614876459352672, 'timestamp': '2025-09-30 23:10:27.794429', 'step': 6016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:27.850583', 'step': 6016, 'epoch': 3} {'type': 'loss', 'content': 0.00022121610527392477, 'timestamp': '2025-09-30 23:10:27.854405', 'step': 6017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:27.913466', 'step': 6017, 'epoch': 3} {'type': 'loss', 'content': 0.00018028906197287142, 'timestamp': '2025-09-30 23:10:27.915843', 'step': 6018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:27.968358', 'step': 6018, 'epoch': 3} {'type': 'loss', 'content': 0.004912437405437231, 'timestamp': '2025-09-30 23:10:27.970715', 'step': 6019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:28.023771', 'step': 6019, 'epoch': 3} {'type': 'loss', 'content': 0.0009681982919573784, 'timestamp': '2025-09-30 23:10:28.030034', 'step': 6020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:28.083725', 'step': 6020, 'epoch': 3} {'type': 'loss', 'content': 0.0006129147368483245, 'timestamp': '2025-09-30 23:10:28.088700', 'step': 6021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:28.145502', 'step': 6021, 'epoch': 3} {'type': 'loss', 'content': 0.00028044317150488496, 'timestamp': '2025-09-30 23:10:28.148290', 'step': 6022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:28.212559', 'step': 6022, 'epoch': 3} {'type': 'loss', 'content': 0.0001447657123208046, 'timestamp': '2025-09-30 23:10:28.216771', 'step': 6023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:28.273905', 'step': 6023, 'epoch': 3} {'type': 'loss', 'content': 0.0005360879586078227, 'timestamp': '2025-09-30 23:10:28.284745', 'step': 6024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:28.348253', 'step': 6024, 'epoch': 3} {'type': 'loss', 'content': 0.0006215216708369553, 'timestamp': '2025-09-30 23:10:28.351243', 'step': 6025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:28.407577', 'step': 6025, 'epoch': 3} {'type': 'loss', 'content': 0.0027759114746004343, 'timestamp': '2025-09-30 23:10:28.412122', 'step': 6026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:28.468038', 'step': 6026, 'epoch': 3} {'type': 'loss', 'content': 0.014327417127788067, 'timestamp': '2025-09-30 23:10:28.470520', 'step': 6027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:28.524791', 'step': 6027, 'epoch': 3} {'type': 'loss', 'content': 0.00027515104738995433, 'timestamp': '2025-09-30 23:10:28.530773', 'step': 6028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:28.586908', 'step': 6028, 'epoch': 3} {'type': 'loss', 'content': 0.0005223188200034201, 'timestamp': '2025-09-30 23:10:28.592281', 'step': 6029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:28.646769', 'step': 6029, 'epoch': 3} {'type': 'loss', 'content': 0.034695353358983994, 'timestamp': '2025-09-30 23:10:28.649093', 'step': 6030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:28.701613', 'step': 6030, 'epoch': 3} {'type': 'loss', 'content': 0.0007443369249813259, 'timestamp': '2025-09-30 23:10:28.707048', 'step': 6031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:28.776215', 'step': 6031, 'epoch': 3} {'type': 'loss', 'content': 0.00027900654822587967, 'timestamp': '2025-09-30 23:10:28.786975', 'step': 6032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:28.843882', 'step': 6032, 'epoch': 3} {'type': 'loss', 'content': 0.05482589825987816, 'timestamp': '2025-09-30 23:10:28.847293', 'step': 6033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:28.903367', 'step': 6033, 'epoch': 3} {'type': 'loss', 'content': 4.2602823668858036e-05, 'timestamp': '2025-09-30 23:10:28.906305', 'step': 6034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:28.965266', 'step': 6034, 'epoch': 3} {'type': 'loss', 'content': 0.0010865943040698767, 'timestamp': '2025-09-30 23:10:28.968294', 'step': 6035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:29.023362', 'step': 6035, 'epoch': 3} {'type': 'loss', 'content': 0.00014402683882508427, 'timestamp': '2025-09-30 23:10:29.030499', 'step': 6036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:29.088890', 'step': 6036, 'epoch': 3} {'type': 'loss', 'content': 0.02590782381594181, 'timestamp': '2025-09-30 23:10:29.096090', 'step': 6037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:29.154701', 'step': 6037, 'epoch': 3} {'type': 'loss', 'content': 0.0008708466775715351, 'timestamp': '2025-09-30 23:10:29.157510', 'step': 6038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:29.212902', 'step': 6038, 'epoch': 3} {'type': 'loss', 'content': 0.0010904736118391156, 'timestamp': '2025-09-30 23:10:29.215605', 'step': 6039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:29.271114', 'step': 6039, 'epoch': 3} {'type': 'loss', 'content': 0.0006493793916888535, 'timestamp': '2025-09-30 23:10:29.277388', 'step': 6040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:29.333259', 'step': 6040, 'epoch': 3} {'type': 'loss', 'content': 0.0010807280195876956, 'timestamp': '2025-09-30 23:10:29.335570', 'step': 6041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:29.389627', 'step': 6041, 'epoch': 3} {'type': 'loss', 'content': 3.07649934256915e-05, 'timestamp': '2025-09-30 23:10:29.392314', 'step': 6042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:29.445598', 'step': 6042, 'epoch': 3} {'type': 'loss', 'content': 0.0020963489077985287, 'timestamp': '2025-09-30 23:10:29.449522', 'step': 6043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:29.505904', 'step': 6043, 'epoch': 3} {'type': 'loss', 'content': 0.0004185824072919786, 'timestamp': '2025-09-30 23:10:29.512273', 'step': 6044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:29.566871', 'step': 6044, 'epoch': 3} {'type': 'loss', 'content': 0.00013696073438040912, 'timestamp': '2025-09-30 23:10:29.569416', 'step': 6045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:29.622933', 'step': 6045, 'epoch': 3} {'type': 'loss', 'content': 0.0001501461665611714, 'timestamp': '2025-09-30 23:10:29.626330', 'step': 6046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:29.679533', 'step': 6046, 'epoch': 3} {'type': 'loss', 'content': 0.00019782067101914436, 'timestamp': '2025-09-30 23:10:29.686917', 'step': 6047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:29.756250', 'step': 6047, 'epoch': 3} {'type': 'loss', 'content': 0.0006049758521839976, 'timestamp': '2025-09-30 23:10:29.762852', 'step': 6048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:29.816173', 'step': 6048, 'epoch': 3} {'type': 'loss', 'content': 0.00720446091145277, 'timestamp': '2025-09-30 23:10:29.818779', 'step': 6049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:29.873521', 'step': 6049, 'epoch': 3} {'type': 'loss', 'content': 0.0011112262727692723, 'timestamp': '2025-09-30 23:10:29.876070', 'step': 6050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:10:29.929529', 'step': 6050, 'epoch': 3} {'type': 'loss', 'content': 0.0006611553253605962, 'timestamp': '2025-09-30 23:10:29.932101', 'step': 6051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:29.985740', 'step': 6051, 'epoch': 3} {'type': 'loss', 'content': 0.0006897070561535656, 'timestamp': '2025-09-30 23:10:29.991640', 'step': 6052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:30.044912', 'step': 6052, 'epoch': 3} {'type': 'loss', 'content': 0.00030282497755251825, 'timestamp': '2025-09-30 23:10:30.047093', 'step': 6053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:30.101011', 'step': 6053, 'epoch': 3} {'type': 'loss', 'content': 0.006974535994231701, 'timestamp': '2025-09-30 23:10:30.103361', 'step': 6054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:30.157783', 'step': 6054, 'epoch': 3} {'type': 'loss', 'content': 0.0012634808663278818, 'timestamp': '2025-09-30 23:10:30.160338', 'step': 6055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:30.213345', 'step': 6055, 'epoch': 3} {'type': 'loss', 'content': 0.05977770686149597, 'timestamp': '2025-09-30 23:10:30.219656', 'step': 6056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:30.272027', 'step': 6056, 'epoch': 3} {'type': 'loss', 'content': 0.052625324577093124, 'timestamp': '2025-09-30 23:10:30.274650', 'step': 6057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:30.327273', 'step': 6057, 'epoch': 3} {'type': 'loss', 'content': 0.0006779053946956992, 'timestamp': '2025-09-30 23:10:30.329696', 'step': 6058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:30.384044', 'step': 6058, 'epoch': 3} {'type': 'loss', 'content': 0.0001708731724647805, 'timestamp': '2025-09-30 23:10:30.386462', 'step': 6059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:30.439213', 'step': 6059, 'epoch': 3} {'type': 'loss', 'content': 0.0024439399130642414, 'timestamp': '2025-09-30 23:10:30.445578', 'step': 6060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:30.500946', 'step': 6060, 'epoch': 3} {'type': 'loss', 'content': 0.018181560561060905, 'timestamp': '2025-09-30 23:10:30.503419', 'step': 6061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:30.556987', 'step': 6061, 'epoch': 3} {'type': 'loss', 'content': 0.0002406299754511565, 'timestamp': '2025-09-30 23:10:30.559861', 'step': 6062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:30.613149', 'step': 6062, 'epoch': 3} {'type': 'loss', 'content': 0.0188923142850399, 'timestamp': '2025-09-30 23:10:30.615635', 'step': 6063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:30.670231', 'step': 6063, 'epoch': 3} {'type': 'loss', 'content': 0.0003814003721345216, 'timestamp': '2025-09-30 23:10:30.676655', 'step': 6064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:30.730615', 'step': 6064, 'epoch': 3} {'type': 'loss', 'content': 0.011620447039604187, 'timestamp': '2025-09-30 23:10:30.732903', 'step': 6065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:30.787127', 'step': 6065, 'epoch': 3} {'type': 'loss', 'content': 0.026694459840655327, 'timestamp': '2025-09-30 23:10:30.789933', 'step': 6066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:30.844474', 'step': 6066, 'epoch': 3} {'type': 'loss', 'content': 0.0014998631086200476, 'timestamp': '2025-09-30 23:10:30.847325', 'step': 6067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:30.900703', 'step': 6067, 'epoch': 3} {'type': 'loss', 'content': 0.052935872226953506, 'timestamp': '2025-09-30 23:10:30.906932', 'step': 6068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:30.960087', 'step': 6068, 'epoch': 3} {'type': 'loss', 'content': 0.008216776885092258, 'timestamp': '2025-09-30 23:10:30.963048', 'step': 6069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:31.016494', 'step': 6069, 'epoch': 3} {'type': 'loss', 'content': 0.00047862224164418876, 'timestamp': '2025-09-30 23:10:31.020710', 'step': 6070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:31.077558', 'step': 6070, 'epoch': 3} {'type': 'loss', 'content': 7.913145236670971e-05, 'timestamp': '2025-09-30 23:10:31.080620', 'step': 6071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:31.135651', 'step': 6071, 'epoch': 3} {'type': 'loss', 'content': 0.0006859530112706125, 'timestamp': '2025-09-30 23:10:31.142127', 'step': 6072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:31.199496', 'step': 6072, 'epoch': 3} {'type': 'loss', 'content': 0.022298991680145264, 'timestamp': '2025-09-30 23:10:31.204845', 'step': 6073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:31.261633', 'step': 6073, 'epoch': 3} {'type': 'loss', 'content': 0.001432115794159472, 'timestamp': '2025-09-30 23:10:31.264596', 'step': 6074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:31.319853', 'step': 6074, 'epoch': 3} {'type': 'loss', 'content': 0.0016321366420015693, 'timestamp': '2025-09-30 23:10:31.322775', 'step': 6075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:31.377774', 'step': 6075, 'epoch': 3} {'type': 'loss', 'content': 0.005310562904924154, 'timestamp': '2025-09-30 23:10:31.384342', 'step': 6076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:31.438898', 'step': 6076, 'epoch': 3} {'type': 'loss', 'content': 0.0026408336125314236, 'timestamp': '2025-09-30 23:10:31.442864', 'step': 6077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:31.500831', 'step': 6077, 'epoch': 3} {'type': 'loss', 'content': 0.001690611825324595, 'timestamp': '2025-09-30 23:10:31.503832', 'step': 6078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:31.559232', 'step': 6078, 'epoch': 3} {'type': 'loss', 'content': 0.00015798376989550889, 'timestamp': '2025-09-30 23:10:31.563388', 'step': 6079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:31.621629', 'step': 6079, 'epoch': 3} {'type': 'loss', 'content': 0.000874347344506532, 'timestamp': '2025-09-30 23:10:31.628092', 'step': 6080, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1596914505344}], 'timestamp': '2025-09-30 23:10:35.107944', 'step': 6080, 'epoch': 3} {'type': 'pplx', 'content': 7744518.141400004, 'timestamp': '2025-09-30 23:10:35.111144', 'step': 6080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:35.162766', 'step': 6080, 'epoch': 3} {'type': 'loss', 'content': 0.0001667732431087643, 'timestamp': '2025-09-30 23:10:35.165735', 'step': 6081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:35.220439', 'step': 6081, 'epoch': 3} {'type': 'loss', 'content': 0.0005629129009321332, 'timestamp': '2025-09-30 23:10:35.223108', 'step': 6082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:35.276888', 'step': 6082, 'epoch': 3} {'type': 'loss', 'content': 0.002156852511689067, 'timestamp': '2025-09-30 23:10:35.283128', 'step': 6083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:35.336797', 'step': 6083, 'epoch': 3} {'type': 'loss', 'content': 0.0017856723861768842, 'timestamp': '2025-09-30 23:10:35.342855', 'step': 6084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:35.394742', 'step': 6084, 'epoch': 3} {'type': 'loss', 'content': 0.0014268964296206832, 'timestamp': '2025-09-30 23:10:35.397148', 'step': 6085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:35.452534', 'step': 6085, 'epoch': 3} {'type': 'loss', 'content': 0.0006754628266207874, 'timestamp': '2025-09-30 23:10:35.454901', 'step': 6086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:35.507072', 'step': 6086, 'epoch': 3} {'type': 'loss', 'content': 0.0016587770078331232, 'timestamp': '2025-09-30 23:10:35.509742', 'step': 6087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:35.566881', 'step': 6087, 'epoch': 3} {'type': 'loss', 'content': 0.011213400401175022, 'timestamp': '2025-09-30 23:10:35.572675', 'step': 6088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:35.634265', 'step': 6088, 'epoch': 3} {'type': 'loss', 'content': 0.0015198008622974157, 'timestamp': '2025-09-30 23:10:35.636536', 'step': 6089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:35.689463', 'step': 6089, 'epoch': 3} {'type': 'loss', 'content': 0.03491882607340813, 'timestamp': '2025-09-30 23:10:35.691834', 'step': 6090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:35.744482', 'step': 6090, 'epoch': 3} {'type': 'loss', 'content': 0.003494510892778635, 'timestamp': '2025-09-30 23:10:35.747419', 'step': 6091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:35.801953', 'step': 6091, 'epoch': 3} {'type': 'loss', 'content': 0.0002873439807444811, 'timestamp': '2025-09-30 23:10:35.808853', 'step': 6092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:35.862179', 'step': 6092, 'epoch': 3} {'type': 'loss', 'content': 0.0027955619152635336, 'timestamp': '2025-09-30 23:10:35.865033', 'step': 6093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:35.917576', 'step': 6093, 'epoch': 3} {'type': 'loss', 'content': 0.0030502884183079004, 'timestamp': '2025-09-30 23:10:35.920157', 'step': 6094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:35.972891', 'step': 6094, 'epoch': 3} {'type': 'loss', 'content': 0.00025508046383038163, 'timestamp': '2025-09-30 23:10:35.975423', 'step': 6095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:36.027812', 'step': 6095, 'epoch': 3} {'type': 'loss', 'content': 0.010487301275134087, 'timestamp': '2025-09-30 23:10:36.033776', 'step': 6096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:36.086048', 'step': 6096, 'epoch': 3} {'type': 'loss', 'content': 0.0008273447165265679, 'timestamp': '2025-09-30 23:10:36.089001', 'step': 6097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:36.146095', 'step': 6097, 'epoch': 3} {'type': 'loss', 'content': 0.001078765606507659, 'timestamp': '2025-09-30 23:10:36.148990', 'step': 6098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:36.204012', 'step': 6098, 'epoch': 3} {'type': 'loss', 'content': 0.02368866465985775, 'timestamp': '2025-09-30 23:10:36.206585', 'step': 6099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:36.263999', 'step': 6099, 'epoch': 3} {'type': 'loss', 'content': 0.001716132159344852, 'timestamp': '2025-09-30 23:10:36.270768', 'step': 6100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:36.325887', 'step': 6100, 'epoch': 3} {'type': 'loss', 'content': 0.0074431817047297955, 'timestamp': '2025-09-30 23:10:36.328163', 'step': 6101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:36.382428', 'step': 6101, 'epoch': 3} {'type': 'loss', 'content': 0.0006271193851716816, 'timestamp': '2025-09-30 23:10:36.386314', 'step': 6102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:36.442024', 'step': 6102, 'epoch': 3} {'type': 'loss', 'content': 0.005522021092474461, 'timestamp': '2025-09-30 23:10:36.444822', 'step': 6103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:36.498589', 'step': 6103, 'epoch': 3} {'type': 'loss', 'content': 0.007628141436725855, 'timestamp': '2025-09-30 23:10:36.504879', 'step': 6104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:36.560911', 'step': 6104, 'epoch': 3} {'type': 'loss', 'content': 0.05212035030126572, 'timestamp': '2025-09-30 23:10:36.564214', 'step': 6105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:36.618771', 'step': 6105, 'epoch': 3} {'type': 'loss', 'content': 0.0008555194945074618, 'timestamp': '2025-09-30 23:10:36.624984', 'step': 6106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:36.680700', 'step': 6106, 'epoch': 3} {'type': 'loss', 'content': 0.00012416283425409347, 'timestamp': '2025-09-30 23:10:36.683536', 'step': 6107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:36.739381', 'step': 6107, 'epoch': 3} {'type': 'loss', 'content': 0.0017382630612701178, 'timestamp': '2025-09-30 23:10:36.746130', 'step': 6108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:36.800307', 'step': 6108, 'epoch': 3} {'type': 'loss', 'content': 0.001539674587547779, 'timestamp': '2025-09-30 23:10:36.803352', 'step': 6109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:36.860164', 'step': 6109, 'epoch': 3} {'type': 'loss', 'content': 0.010468309745192528, 'timestamp': '2025-09-30 23:10:36.862761', 'step': 6110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:36.916032', 'step': 6110, 'epoch': 3} {'type': 'loss', 'content': 0.0002082336286548525, 'timestamp': '2025-09-30 23:10:36.920326', 'step': 6111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:36.975271', 'step': 6111, 'epoch': 3} {'type': 'loss', 'content': 0.0015279166400432587, 'timestamp': '2025-09-30 23:10:36.981181', 'step': 6112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:37.033540', 'step': 6112, 'epoch': 3} {'type': 'loss', 'content': 0.010597271844744682, 'timestamp': '2025-09-30 23:10:37.036484', 'step': 6113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:37.089723', 'step': 6113, 'epoch': 3} {'type': 'loss', 'content': 0.043092601001262665, 'timestamp': '2025-09-30 23:10:37.092123', 'step': 6114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:37.145200', 'step': 6114, 'epoch': 3} {'type': 'loss', 'content': 0.03031967207789421, 'timestamp': '2025-09-30 23:10:37.148606', 'step': 6115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:37.204644', 'step': 6115, 'epoch': 3} {'type': 'loss', 'content': 0.0017864464316517115, 'timestamp': '2025-09-30 23:10:37.212484', 'step': 6116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:37.266791', 'step': 6116, 'epoch': 3} {'type': 'loss', 'content': 0.004665930289775133, 'timestamp': '2025-09-30 23:10:37.270275', 'step': 6117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:37.322987', 'step': 6117, 'epoch': 3} {'type': 'loss', 'content': 0.03573284670710564, 'timestamp': '2025-09-30 23:10:37.331199', 'step': 6118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:10:37.393241', 'step': 6118, 'epoch': 3} {'type': 'loss', 'content': 0.0036997415591031313, 'timestamp': '2025-09-30 23:10:37.395955', 'step': 6119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:37.450143', 'step': 6119, 'epoch': 3} {'type': 'loss', 'content': 0.0034331236965954304, 'timestamp': '2025-09-30 23:10:37.457163', 'step': 6120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:37.510400', 'step': 6120, 'epoch': 3} {'type': 'loss', 'content': 0.0013912953436374664, 'timestamp': '2025-09-30 23:10:37.512874', 'step': 6121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:37.567582', 'step': 6121, 'epoch': 3} {'type': 'loss', 'content': 0.003301881719380617, 'timestamp': '2025-09-30 23:10:37.570536', 'step': 6122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:37.626284', 'step': 6122, 'epoch': 3} {'type': 'loss', 'content': 0.004548928700387478, 'timestamp': '2025-09-30 23:10:37.629721', 'step': 6123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:37.686881', 'step': 6123, 'epoch': 3} {'type': 'loss', 'content': 0.04157004877924919, 'timestamp': '2025-09-30 23:10:37.694429', 'step': 6124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:37.748420', 'step': 6124, 'epoch': 3} {'type': 'loss', 'content': 0.0009946438949555159, 'timestamp': '2025-09-30 23:10:37.751065', 'step': 6125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:37.804882', 'step': 6125, 'epoch': 3} {'type': 'loss', 'content': 0.008195522241294384, 'timestamp': '2025-09-30 23:10:37.812047', 'step': 6126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:37.870084', 'step': 6126, 'epoch': 3} {'type': 'loss', 'content': 0.002539691748097539, 'timestamp': '2025-09-30 23:10:37.872829', 'step': 6127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:37.925402', 'step': 6127, 'epoch': 3} {'type': 'loss', 'content': 0.026695188134908676, 'timestamp': '2025-09-30 23:10:37.936709', 'step': 6128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:37.994112', 'step': 6128, 'epoch': 3} {'type': 'loss', 'content': 0.04791973903775215, 'timestamp': '2025-09-30 23:10:37.996416', 'step': 6129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:38.049494', 'step': 6129, 'epoch': 3} {'type': 'loss', 'content': 0.004805984441190958, 'timestamp': '2025-09-30 23:10:38.052342', 'step': 6130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:38.106419', 'step': 6130, 'epoch': 3} {'type': 'loss', 'content': 0.00040175742469727993, 'timestamp': '2025-09-30 23:10:38.109612', 'step': 6131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:38.166070', 'step': 6131, 'epoch': 3} {'type': 'loss', 'content': 0.003293469315394759, 'timestamp': '2025-09-30 23:10:38.175048', 'step': 6132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:38.229894', 'step': 6132, 'epoch': 3} {'type': 'loss', 'content': 0.0035249192733317614, 'timestamp': '2025-09-30 23:10:38.232443', 'step': 6133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:38.288898', 'step': 6133, 'epoch': 3} {'type': 'loss', 'content': 0.003525723237544298, 'timestamp': '2025-09-30 23:10:38.292703', 'step': 6134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:38.348898', 'step': 6134, 'epoch': 3} {'type': 'loss', 'content': 0.0037024947814643383, 'timestamp': '2025-09-30 23:10:38.353096', 'step': 6135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:38.410888', 'step': 6135, 'epoch': 3} {'type': 'loss', 'content': 0.011846224777400494, 'timestamp': '2025-09-30 23:10:38.418788', 'step': 6136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:38.476786', 'step': 6136, 'epoch': 3} {'type': 'loss', 'content': 0.011663620360195637, 'timestamp': '2025-09-30 23:10:38.480613', 'step': 6137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:38.536118', 'step': 6137, 'epoch': 3} {'type': 'loss', 'content': 0.04969537630677223, 'timestamp': '2025-09-30 23:10:38.540769', 'step': 6138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:38.597982', 'step': 6138, 'epoch': 3} {'type': 'loss', 'content': 0.002530515193939209, 'timestamp': '2025-09-30 23:10:38.602473', 'step': 6139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:38.661542', 'step': 6139, 'epoch': 3} {'type': 'loss', 'content': 0.001158969127573073, 'timestamp': '2025-09-30 23:10:38.668736', 'step': 6140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:38.726850', 'step': 6140, 'epoch': 3} {'type': 'loss', 'content': 0.0009190370328724384, 'timestamp': '2025-09-30 23:10:38.730250', 'step': 6141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:38.785176', 'step': 6141, 'epoch': 3} {'type': 'loss', 'content': 0.0044724117033183575, 'timestamp': '2025-09-30 23:10:38.788339', 'step': 6142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:38.844810', 'step': 6142, 'epoch': 3} {'type': 'loss', 'content': 0.0015284327091649175, 'timestamp': '2025-09-30 23:10:38.848404', 'step': 6143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:38.903212', 'step': 6143, 'epoch': 3} {'type': 'loss', 'content': 0.0013598980149254203, 'timestamp': '2025-09-30 23:10:38.910250', 'step': 6144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:38.963649', 'step': 6144, 'epoch': 3} {'type': 'loss', 'content': 0.0020011167507618666, 'timestamp': '2025-09-30 23:10:38.966841', 'step': 6145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:39.025794', 'step': 6145, 'epoch': 3} {'type': 'loss', 'content': 0.0026586130261421204, 'timestamp': '2025-09-30 23:10:39.028854', 'step': 6146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:39.085463', 'step': 6146, 'epoch': 3} {'type': 'loss', 'content': 0.0026074678171426058, 'timestamp': '2025-09-30 23:10:39.088513', 'step': 6147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:39.142241', 'step': 6147, 'epoch': 3} {'type': 'loss', 'content': 0.001777377794496715, 'timestamp': '2025-09-30 23:10:39.153692', 'step': 6148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:39.210262', 'step': 6148, 'epoch': 3} {'type': 'loss', 'content': 0.0026821789797395468, 'timestamp': '2025-09-30 23:10:39.213449', 'step': 6149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:39.272077', 'step': 6149, 'epoch': 3} {'type': 'loss', 'content': 0.0007063757511787117, 'timestamp': '2025-09-30 23:10:39.275201', 'step': 6150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:39.329827', 'step': 6150, 'epoch': 3} {'type': 'loss', 'content': 0.006616094149649143, 'timestamp': '2025-09-30 23:10:39.332923', 'step': 6151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:39.388210', 'step': 6151, 'epoch': 3} {'type': 'loss', 'content': 0.0011938760289922357, 'timestamp': '2025-09-30 23:10:39.394658', 'step': 6152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:10:39.449011', 'step': 6152, 'epoch': 3} {'type': 'loss', 'content': 0.0021523190662264824, 'timestamp': '2025-09-30 23:10:39.453894', 'step': 6153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:39.510991', 'step': 6153, 'epoch': 3} {'type': 'loss', 'content': 0.003226557280868292, 'timestamp': '2025-09-30 23:10:39.514325', 'step': 6154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:39.573414', 'step': 6154, 'epoch': 3} {'type': 'loss', 'content': 0.006363728549331427, 'timestamp': '2025-09-30 23:10:39.577605', 'step': 6155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:39.634271', 'step': 6155, 'epoch': 3} {'type': 'loss', 'content': 0.006163180340081453, 'timestamp': '2025-09-30 23:10:39.641457', 'step': 6156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:39.695285', 'step': 6156, 'epoch': 3} {'type': 'loss', 'content': 0.0004865788505412638, 'timestamp': '2025-09-30 23:10:39.699798', 'step': 6157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:39.758020', 'step': 6157, 'epoch': 3} {'type': 'loss', 'content': 0.0022458506282418966, 'timestamp': '2025-09-30 23:10:39.769251', 'step': 6158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:39.832459', 'step': 6158, 'epoch': 3} {'type': 'loss', 'content': 0.0011290365364402533, 'timestamp': '2025-09-30 23:10:39.836496', 'step': 6159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:39.906683', 'step': 6159, 'epoch': 3} {'type': 'loss', 'content': 0.0006361679988913238, 'timestamp': '2025-09-30 23:10:39.915568', 'step': 6160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:39.976330', 'step': 6160, 'epoch': 3} {'type': 'loss', 'content': 0.039030611515045166, 'timestamp': '2025-09-30 23:10:39.983195', 'step': 6161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:40.043152', 'step': 6161, 'epoch': 3} {'type': 'loss', 'content': 0.02532217837870121, 'timestamp': '2025-09-30 23:10:40.046606', 'step': 6162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:40.102428', 'step': 6162, 'epoch': 3} {'type': 'loss', 'content': 0.005391399376094341, 'timestamp': '2025-09-30 23:10:40.106259', 'step': 6163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:40.164218', 'step': 6163, 'epoch': 3} {'type': 'loss', 'content': 0.004465493839234114, 'timestamp': '2025-09-30 23:10:40.170899', 'step': 6164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:40.225015', 'step': 6164, 'epoch': 3} {'type': 'loss', 'content': 0.0001443744549760595, 'timestamp': '2025-09-30 23:10:40.228206', 'step': 6165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:40.285221', 'step': 6165, 'epoch': 3} {'type': 'loss', 'content': 0.022102871909737587, 'timestamp': '2025-09-30 23:10:40.289427', 'step': 6166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:40.343626', 'step': 6166, 'epoch': 3} {'type': 'loss', 'content': 0.006327638868242502, 'timestamp': '2025-09-30 23:10:40.347355', 'step': 6167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:40.402927', 'step': 6167, 'epoch': 3} {'type': 'loss', 'content': 0.0029187314212322235, 'timestamp': '2025-09-30 23:10:40.409681', 'step': 6168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:40.463469', 'step': 6168, 'epoch': 3} {'type': 'loss', 'content': 0.0072292909026145935, 'timestamp': '2025-09-30 23:10:40.468879', 'step': 6169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:40.525694', 'step': 6169, 'epoch': 3} {'type': 'loss', 'content': 0.0005583249730989337, 'timestamp': '2025-09-30 23:10:40.528824', 'step': 6170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:40.584003', 'step': 6170, 'epoch': 3} {'type': 'loss', 'content': 0.0003256552736274898, 'timestamp': '2025-09-30 23:10:40.592280', 'step': 6171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:40.649946', 'step': 6171, 'epoch': 3} {'type': 'loss', 'content': 0.009944583289325237, 'timestamp': '2025-09-30 23:10:40.662842', 'step': 6172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:40.719057', 'step': 6172, 'epoch': 3} {'type': 'loss', 'content': 0.002007441595196724, 'timestamp': '2025-09-30 23:10:40.721714', 'step': 6173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:40.780206', 'step': 6173, 'epoch': 3} {'type': 'loss', 'content': 0.00022464003995992243, 'timestamp': '2025-09-30 23:10:40.782819', 'step': 6174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:40.835992', 'step': 6174, 'epoch': 3} {'type': 'loss', 'content': 0.0005165779148228467, 'timestamp': '2025-09-30 23:10:40.840232', 'step': 6175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:40.893259', 'step': 6175, 'epoch': 3} {'type': 'loss', 'content': 0.00011230054951738566, 'timestamp': '2025-09-30 23:10:40.900325', 'step': 6176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:40.955774', 'step': 6176, 'epoch': 3} {'type': 'loss', 'content': 0.0015549707459285855, 'timestamp': '2025-09-30 23:10:40.958929', 'step': 6177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:41.012642', 'step': 6177, 'epoch': 3} {'type': 'loss', 'content': 0.00021928916976321489, 'timestamp': '2025-09-30 23:10:41.016258', 'step': 6178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:41.074128', 'step': 6178, 'epoch': 3} {'type': 'loss', 'content': 0.0006658621714450419, 'timestamp': '2025-09-30 23:10:41.077419', 'step': 6179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:41.132034', 'step': 6179, 'epoch': 3} {'type': 'loss', 'content': 0.0021904183086007833, 'timestamp': '2025-09-30 23:10:41.138751', 'step': 6180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:41.193479', 'step': 6180, 'epoch': 3} {'type': 'loss', 'content': 0.022053133696317673, 'timestamp': '2025-09-30 23:10:41.197085', 'step': 6181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:41.252830', 'step': 6181, 'epoch': 3} {'type': 'loss', 'content': 0.012288413010537624, 'timestamp': '2025-09-30 23:10:41.255822', 'step': 6182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:41.310776', 'step': 6182, 'epoch': 3} {'type': 'loss', 'content': 0.0024347417056560516, 'timestamp': '2025-09-30 23:10:41.315190', 'step': 6183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:41.371415', 'step': 6183, 'epoch': 3} {'type': 'loss', 'content': 0.00019760058785323054, 'timestamp': '2025-09-30 23:10:41.378128', 'step': 6184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:41.431947', 'step': 6184, 'epoch': 3} {'type': 'loss', 'content': 0.003138650208711624, 'timestamp': '2025-09-30 23:10:41.435162', 'step': 6185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:41.491238', 'step': 6185, 'epoch': 3} {'type': 'loss', 'content': 0.0005038061062805355, 'timestamp': '2025-09-30 23:10:41.494397', 'step': 6186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:41.548673', 'step': 6186, 'epoch': 3} {'type': 'loss', 'content': 0.0003762751875910908, 'timestamp': '2025-09-30 23:10:41.551293', 'step': 6187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:41.605438', 'step': 6187, 'epoch': 3} {'type': 'loss', 'content': 0.015753544867038727, 'timestamp': '2025-09-30 23:10:41.612598', 'step': 6188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:41.667882', 'step': 6188, 'epoch': 3} {'type': 'loss', 'content': 7.357623690040782e-05, 'timestamp': '2025-09-30 23:10:41.671884', 'step': 6189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:41.728677', 'step': 6189, 'epoch': 3} {'type': 'loss', 'content': 0.0007422869093716145, 'timestamp': '2025-09-30 23:10:41.731541', 'step': 6190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:41.786093', 'step': 6190, 'epoch': 3} {'type': 'loss', 'content': 0.00013605508138425648, 'timestamp': '2025-09-30 23:10:41.789018', 'step': 6191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:41.842720', 'step': 6191, 'epoch': 3} {'type': 'loss', 'content': 0.03452552482485771, 'timestamp': '2025-09-30 23:10:41.850579', 'step': 6192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:41.905310', 'step': 6192, 'epoch': 3} {'type': 'loss', 'content': 0.019617276266217232, 'timestamp': '2025-09-30 23:10:41.908654', 'step': 6193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:41.963169', 'step': 6193, 'epoch': 3} {'type': 'loss', 'content': 0.0012784033315256238, 'timestamp': '2025-09-30 23:10:41.966383', 'step': 6194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:42.021049', 'step': 6194, 'epoch': 3} {'type': 'loss', 'content': 0.05792684480547905, 'timestamp': '2025-09-30 23:10:42.023422', 'step': 6195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:42.082345', 'step': 6195, 'epoch': 3} {'type': 'loss', 'content': 0.00068598089274019, 'timestamp': '2025-09-30 23:10:42.088506', 'step': 6196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:42.141594', 'step': 6196, 'epoch': 3} {'type': 'loss', 'content': 0.0023151934146881104, 'timestamp': '2025-09-30 23:10:42.144156', 'step': 6197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:42.197242', 'step': 6197, 'epoch': 3} {'type': 'loss', 'content': 0.0005570785142481327, 'timestamp': '2025-09-30 23:10:42.200580', 'step': 6198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:42.255702', 'step': 6198, 'epoch': 3} {'type': 'loss', 'content': 0.0007398042944259942, 'timestamp': '2025-09-30 23:10:42.258254', 'step': 6199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:42.311979', 'step': 6199, 'epoch': 3} {'type': 'loss', 'content': 0.00011716986045939848, 'timestamp': '2025-09-30 23:10:42.318528', 'step': 6200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:42.370485', 'step': 6200, 'epoch': 3} {'type': 'loss', 'content': 0.0020101224072277546, 'timestamp': '2025-09-30 23:10:42.382303', 'step': 6201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:42.452635', 'step': 6201, 'epoch': 3} {'type': 'loss', 'content': 0.0008709885878488421, 'timestamp': '2025-09-30 23:10:42.463261', 'step': 6202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:42.541767', 'step': 6202, 'epoch': 3} {'type': 'loss', 'content': 0.00025499428738839924, 'timestamp': '2025-09-30 23:10:42.555296', 'step': 6203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:42.623682', 'step': 6203, 'epoch': 3} {'type': 'loss', 'content': 0.0005530674825422466, 'timestamp': '2025-09-30 23:10:42.635386', 'step': 6204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:42.701304', 'step': 6204, 'epoch': 3} {'type': 'loss', 'content': 0.011064812541007996, 'timestamp': '2025-09-30 23:10:42.704315', 'step': 6205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:42.771519', 'step': 6205, 'epoch': 3} {'type': 'loss', 'content': 0.001220627804286778, 'timestamp': '2025-09-30 23:10:42.780103', 'step': 6206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:42.849480', 'step': 6206, 'epoch': 3} {'type': 'loss', 'content': 0.0006734305061399937, 'timestamp': '2025-09-30 23:10:42.853794', 'step': 6207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:42.919667', 'step': 6207, 'epoch': 3} {'type': 'loss', 'content': 0.01612117514014244, 'timestamp': '2025-09-30 23:10:42.933064', 'step': 6208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:43.004442', 'step': 6208, 'epoch': 3} {'type': 'loss', 'content': 0.0007196534425020218, 'timestamp': '2025-09-30 23:10:43.013535', 'step': 6209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:43.089360', 'step': 6209, 'epoch': 3} {'type': 'loss', 'content': 0.0012910200748592615, 'timestamp': '2025-09-30 23:10:43.097169', 'step': 6210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:43.168162', 'step': 6210, 'epoch': 3} {'type': 'loss', 'content': 0.04107385128736496, 'timestamp': '2025-09-30 23:10:43.176889', 'step': 6211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:43.244361', 'step': 6211, 'epoch': 3} {'type': 'loss', 'content': 0.002373811323195696, 'timestamp': '2025-09-30 23:10:43.254521', 'step': 6212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:43.320071', 'step': 6212, 'epoch': 3} {'type': 'loss', 'content': 0.0004085669352207333, 'timestamp': '2025-09-30 23:10:43.327104', 'step': 6213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:43.389371', 'step': 6213, 'epoch': 3} {'type': 'loss', 'content': 0.0006172267603687942, 'timestamp': '2025-09-30 23:10:43.395369', 'step': 6214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:43.460877', 'step': 6214, 'epoch': 3} {'type': 'loss', 'content': 0.021210649982094765, 'timestamp': '2025-09-30 23:10:43.468399', 'step': 6215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:43.542248', 'step': 6215, 'epoch': 3} {'type': 'loss', 'content': 0.030670111998915672, 'timestamp': '2025-09-30 23:10:43.552337', 'step': 6216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:43.619715', 'step': 6216, 'epoch': 3} {'type': 'loss', 'content': 0.022809209302067757, 'timestamp': '2025-09-30 23:10:43.626115', 'step': 6217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:43.688869', 'step': 6217, 'epoch': 3} {'type': 'loss', 'content': 0.049270790070295334, 'timestamp': '2025-09-30 23:10:43.696599', 'step': 6218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:43.763758', 'step': 6218, 'epoch': 3} {'type': 'loss', 'content': 0.010571567341685295, 'timestamp': '2025-09-30 23:10:43.770881', 'step': 6219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:43.838434', 'step': 6219, 'epoch': 3} {'type': 'loss', 'content': 0.0003018863208126277, 'timestamp': '2025-09-30 23:10:43.849323', 'step': 6220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:43.921719', 'step': 6220, 'epoch': 3} {'type': 'loss', 'content': 0.0006474992842413485, 'timestamp': '2025-09-30 23:10:43.931129', 'step': 6221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:44.000443', 'step': 6221, 'epoch': 3} {'type': 'loss', 'content': 0.004027174785733223, 'timestamp': '2025-09-30 23:10:44.011041', 'step': 6222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:44.076321', 'step': 6222, 'epoch': 3} {'type': 'loss', 'content': 0.00058440410066396, 'timestamp': '2025-09-30 23:10:44.083828', 'step': 6223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:44.148909', 'step': 6223, 'epoch': 3} {'type': 'loss', 'content': 0.033116478472948074, 'timestamp': '2025-09-30 23:10:44.157065', 'step': 6224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:44.220597', 'step': 6224, 'epoch': 3} {'type': 'loss', 'content': 0.017621684819459915, 'timestamp': '2025-09-30 23:10:44.228571', 'step': 6225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:44.296362', 'step': 6225, 'epoch': 3} {'type': 'loss', 'content': 0.0015247439732775092, 'timestamp': '2025-09-30 23:10:44.303396', 'step': 6226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:44.371532', 'step': 6226, 'epoch': 3} {'type': 'loss', 'content': 0.0022286984603852034, 'timestamp': '2025-09-30 23:10:44.380145', 'step': 6227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:44.445116', 'step': 6227, 'epoch': 3} {'type': 'loss', 'content': 0.01495493482798338, 'timestamp': '2025-09-30 23:10:44.456541', 'step': 6228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:44.522993', 'step': 6228, 'epoch': 3} {'type': 'loss', 'content': 0.0008064787252806127, 'timestamp': '2025-09-30 23:10:44.530906', 'step': 6229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:44.596401', 'step': 6229, 'epoch': 3} {'type': 'loss', 'content': 0.0035527406726032495, 'timestamp': '2025-09-30 23:10:44.603914', 'step': 6230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:44.669003', 'step': 6230, 'epoch': 3} {'type': 'loss', 'content': 0.00043569301487877965, 'timestamp': '2025-09-30 23:10:44.676196', 'step': 6231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:44.744728', 'step': 6231, 'epoch': 3} {'type': 'loss', 'content': 0.0003434464742895216, 'timestamp': '2025-09-30 23:10:44.758491', 'step': 6232, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1596914505344}], 'timestamp': '2025-09-30 23:10:49.264491', 'step': 6232, 'epoch': 3} {'type': 'pplx', 'content': 6784142.825868493, 'timestamp': '2025-09-30 23:10:49.270021', 'step': 6232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:49.337686', 'step': 6232, 'epoch': 3} {'type': 'loss', 'content': 0.029657969251275063, 'timestamp': '2025-09-30 23:10:49.351494', 'step': 6233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:10:49.440566', 'step': 6233, 'epoch': 3} {'type': 'loss', 'content': 0.001077648950740695, 'timestamp': '2025-09-30 23:10:49.453619', 'step': 6234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:49.535994', 'step': 6234, 'epoch': 3} {'type': 'loss', 'content': 0.0001990543823922053, 'timestamp': '2025-09-30 23:10:49.539677', 'step': 6235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:49.604518', 'step': 6235, 'epoch': 3} {'type': 'loss', 'content': 0.0001248013722943142, 'timestamp': '2025-09-30 23:10:49.614828', 'step': 6236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:49.680514', 'step': 6236, 'epoch': 3} {'type': 'loss', 'content': 0.0002706664090510458, 'timestamp': '2025-09-30 23:10:49.686356', 'step': 6237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:49.750066', 'step': 6237, 'epoch': 3} {'type': 'loss', 'content': 0.0005378506029956043, 'timestamp': '2025-09-30 23:10:49.758108', 'step': 6238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:49.828670', 'step': 6238, 'epoch': 3} {'type': 'loss', 'content': 0.00020905118435621262, 'timestamp': '2025-09-30 23:10:49.835066', 'step': 6239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:49.902313', 'step': 6239, 'epoch': 3} {'type': 'loss', 'content': 0.0003662164672277868, 'timestamp': '2025-09-30 23:10:49.914423', 'step': 6240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:49.980759', 'step': 6240, 'epoch': 3} {'type': 'loss', 'content': 0.02534596435725689, 'timestamp': '2025-09-30 23:10:49.988428', 'step': 6241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:50.052407', 'step': 6241, 'epoch': 3} {'type': 'loss', 'content': 0.0038699486758559942, 'timestamp': '2025-09-30 23:10:50.055995', 'step': 6242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:50.128679', 'step': 6242, 'epoch': 3} {'type': 'loss', 'content': 0.00014221174933481961, 'timestamp': '2025-09-30 23:10:50.136388', 'step': 6243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:50.207145', 'step': 6243, 'epoch': 3} {'type': 'loss', 'content': 0.0009519042796455324, 'timestamp': '2025-09-30 23:10:50.219442', 'step': 6244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:50.287211', 'step': 6244, 'epoch': 3} {'type': 'loss', 'content': 0.008966139517724514, 'timestamp': '2025-09-30 23:10:50.295355', 'step': 6245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:50.362436', 'step': 6245, 'epoch': 3} {'type': 'loss', 'content': 0.0008448067819699645, 'timestamp': '2025-09-30 23:10:50.377381', 'step': 6246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:50.460661', 'step': 6246, 'epoch': 3} {'type': 'loss', 'content': 0.006453275214880705, 'timestamp': '2025-09-30 23:10:50.464701', 'step': 6247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:50.529429', 'step': 6247, 'epoch': 3} {'type': 'loss', 'content': 0.0007194023346528411, 'timestamp': '2025-09-30 23:10:50.540908', 'step': 6248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:50.611584', 'step': 6248, 'epoch': 3} {'type': 'loss', 'content': 0.002528703073039651, 'timestamp': '2025-09-30 23:10:50.616056', 'step': 6249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:50.685072', 'step': 6249, 'epoch': 3} {'type': 'loss', 'content': 0.012676822021603584, 'timestamp': '2025-09-30 23:10:50.697744', 'step': 6250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:50.769538', 'step': 6250, 'epoch': 3} {'type': 'loss', 'content': 0.0026660007424652576, 'timestamp': '2025-09-30 23:10:50.780728', 'step': 6251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:50.852117', 'step': 6251, 'epoch': 3} {'type': 'loss', 'content': 0.059536367654800415, 'timestamp': '2025-09-30 23:10:50.863821', 'step': 6252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:50.930789', 'step': 6252, 'epoch': 3} {'type': 'loss', 'content': 0.013077245093882084, 'timestamp': '2025-09-30 23:10:50.933980', 'step': 6253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:50.995553', 'step': 6253, 'epoch': 3} {'type': 'loss', 'content': 0.006596884690225124, 'timestamp': '2025-09-30 23:10:50.999035', 'step': 6254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:51.060838', 'step': 6254, 'epoch': 3} {'type': 'loss', 'content': 0.0022262954153120518, 'timestamp': '2025-09-30 23:10:51.068364', 'step': 6255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:51.132463', 'step': 6255, 'epoch': 3} {'type': 'loss', 'content': 0.00011282953346380964, 'timestamp': '2025-09-30 23:10:51.143224', 'step': 6256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:51.210622', 'step': 6256, 'epoch': 3} {'type': 'loss', 'content': 0.00592490378767252, 'timestamp': '2025-09-30 23:10:51.216241', 'step': 6257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:51.278084', 'step': 6257, 'epoch': 3} {'type': 'loss', 'content': 0.0007679336704313755, 'timestamp': '2025-09-30 23:10:51.281046', 'step': 6258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:51.365904', 'step': 6258, 'epoch': 3} {'type': 'loss', 'content': 0.001016707275994122, 'timestamp': '2025-09-30 23:10:51.381003', 'step': 6259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:51.469012', 'step': 6259, 'epoch': 3} {'type': 'loss', 'content': 0.00029104368877597153, 'timestamp': '2025-09-30 23:10:51.476522', 'step': 6260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:51.542436', 'step': 6260, 'epoch': 3} {'type': 'loss', 'content': 0.0014459873782470822, 'timestamp': '2025-09-30 23:10:51.547966', 'step': 6261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:51.610248', 'step': 6261, 'epoch': 3} {'type': 'loss', 'content': 0.0006485211779363453, 'timestamp': '2025-09-30 23:10:51.615252', 'step': 6262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:51.678700', 'step': 6262, 'epoch': 3} {'type': 'loss', 'content': 0.00019417052681092173, 'timestamp': '2025-09-30 23:10:51.684756', 'step': 6263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:51.747967', 'step': 6263, 'epoch': 3} {'type': 'loss', 'content': 0.0004960859077982605, 'timestamp': '2025-09-30 23:10:51.768095', 'step': 6264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:51.835519', 'step': 6264, 'epoch': 3} {'type': 'loss', 'content': 0.01505367737263441, 'timestamp': '2025-09-30 23:10:51.843830', 'step': 6265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:51.901160', 'step': 6265, 'epoch': 3} {'type': 'loss', 'content': 0.0005311720888130367, 'timestamp': '2025-09-30 23:10:51.911246', 'step': 6266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:51.982195', 'step': 6266, 'epoch': 3} {'type': 'loss', 'content': 0.00017645883781369776, 'timestamp': '2025-09-30 23:10:51.987977', 'step': 6267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:52.053040', 'step': 6267, 'epoch': 3} {'type': 'loss', 'content': 0.002053595380857587, 'timestamp': '2025-09-30 23:10:52.059958', 'step': 6268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:52.124858', 'step': 6268, 'epoch': 3} {'type': 'loss', 'content': 0.0003948642697650939, 'timestamp': '2025-09-30 23:10:52.130282', 'step': 6269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:52.194053', 'step': 6269, 'epoch': 3} {'type': 'loss', 'content': 0.0012654305901378393, 'timestamp': '2025-09-30 23:10:52.201779', 'step': 6270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:52.260786', 'step': 6270, 'epoch': 3} {'type': 'loss', 'content': 0.003507346147671342, 'timestamp': '2025-09-30 23:10:52.266645', 'step': 6271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:52.332337', 'step': 6271, 'epoch': 3} {'type': 'loss', 'content': 0.0002877027145586908, 'timestamp': '2025-09-30 23:10:52.340427', 'step': 6272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:52.414769', 'step': 6272, 'epoch': 3} {'type': 'loss', 'content': 0.016307471320033073, 'timestamp': '2025-09-30 23:10:52.426120', 'step': 6273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:52.488057', 'step': 6273, 'epoch': 3} {'type': 'loss', 'content': 0.03632018342614174, 'timestamp': '2025-09-30 23:10:52.499850', 'step': 6274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:52.566428', 'step': 6274, 'epoch': 3} {'type': 'loss', 'content': 0.009446459822356701, 'timestamp': '2025-09-30 23:10:52.571073', 'step': 6275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:52.643747', 'step': 6275, 'epoch': 3} {'type': 'loss', 'content': 0.004558125045150518, 'timestamp': '2025-09-30 23:10:52.655967', 'step': 6276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:52.720376', 'step': 6276, 'epoch': 3} {'type': 'loss', 'content': 0.007741756737232208, 'timestamp': '2025-09-30 23:10:52.725874', 'step': 6277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:52.787649', 'step': 6277, 'epoch': 3} {'type': 'loss', 'content': 0.00035169199691154063, 'timestamp': '2025-09-30 23:10:52.794290', 'step': 6278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:52.855367', 'step': 6278, 'epoch': 3} {'type': 'loss', 'content': 0.0011580362915992737, 'timestamp': '2025-09-30 23:10:52.862312', 'step': 6279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:52.940507', 'step': 6279, 'epoch': 3} {'type': 'loss', 'content': 7.842414925107732e-05, 'timestamp': '2025-09-30 23:10:52.963238', 'step': 6280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:53.026413', 'step': 6280, 'epoch': 3} {'type': 'loss', 'content': 0.003099729772657156, 'timestamp': '2025-09-30 23:10:53.031609', 'step': 6281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:53.092234', 'step': 6281, 'epoch': 3} {'type': 'loss', 'content': 0.004029258154332638, 'timestamp': '2025-09-30 23:10:53.096323', 'step': 6282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:53.157830', 'step': 6282, 'epoch': 3} {'type': 'loss', 'content': 0.006575337145477533, 'timestamp': '2025-09-30 23:10:53.162715', 'step': 6283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:53.219759', 'step': 6283, 'epoch': 3} {'type': 'loss', 'content': 0.00011522588465595618, 'timestamp': '2025-09-30 23:10:53.226018', 'step': 6284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:53.282512', 'step': 6284, 'epoch': 3} {'type': 'loss', 'content': 0.00210801069624722, 'timestamp': '2025-09-30 23:10:53.292072', 'step': 6285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:53.358374', 'step': 6285, 'epoch': 3} {'type': 'loss', 'content': 0.0038987770676612854, 'timestamp': '2025-09-30 23:10:53.360912', 'step': 6286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:53.420049', 'step': 6286, 'epoch': 3} {'type': 'loss', 'content': 0.0001797329168766737, 'timestamp': '2025-09-30 23:10:53.424556', 'step': 6287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:53.481947', 'step': 6287, 'epoch': 3} {'type': 'loss', 'content': 0.009983996860682964, 'timestamp': '2025-09-30 23:10:53.489121', 'step': 6288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:53.546503', 'step': 6288, 'epoch': 3} {'type': 'loss', 'content': 0.0003826339670922607, 'timestamp': '2025-09-30 23:10:53.550424', 'step': 6289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:53.609907', 'step': 6289, 'epoch': 3} {'type': 'loss', 'content': 0.0008662755717523396, 'timestamp': '2025-09-30 23:10:53.614441', 'step': 6290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:53.673575', 'step': 6290, 'epoch': 3} {'type': 'loss', 'content': 0.022345472127199173, 'timestamp': '2025-09-30 23:10:53.677585', 'step': 6291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:53.734259', 'step': 6291, 'epoch': 3} {'type': 'loss', 'content': 0.0022990235593169928, 'timestamp': '2025-09-30 23:10:53.740869', 'step': 6292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:53.807561', 'step': 6292, 'epoch': 3} {'type': 'loss', 'content': 0.00166352151427418, 'timestamp': '2025-09-30 23:10:53.811088', 'step': 6293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:53.876603', 'step': 6293, 'epoch': 3} {'type': 'loss', 'content': 7.14385969331488e-05, 'timestamp': '2025-09-30 23:10:53.879313', 'step': 6294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:53.942074', 'step': 6294, 'epoch': 3} {'type': 'loss', 'content': 0.04192036762833595, 'timestamp': '2025-09-30 23:10:53.945003', 'step': 6295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:54.002534', 'step': 6295, 'epoch': 3} {'type': 'loss', 'content': 0.0007747958297841251, 'timestamp': '2025-09-30 23:10:54.011757', 'step': 6296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:54.073297', 'step': 6296, 'epoch': 3} {'type': 'loss', 'content': 0.0023105719592422247, 'timestamp': '2025-09-30 23:10:54.078888', 'step': 6297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:54.141340', 'step': 6297, 'epoch': 3} {'type': 'loss', 'content': 0.03192998468875885, 'timestamp': '2025-09-30 23:10:54.145360', 'step': 6298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:54.203582', 'step': 6298, 'epoch': 3} {'type': 'loss', 'content': 0.03999996930360794, 'timestamp': '2025-09-30 23:10:54.208477', 'step': 6299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:54.269072', 'step': 6299, 'epoch': 3} {'type': 'loss', 'content': 0.0014140954008325934, 'timestamp': '2025-09-30 23:10:54.279491', 'step': 6300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:54.340061', 'step': 6300, 'epoch': 3} {'type': 'loss', 'content': 0.00037910696119070053, 'timestamp': '2025-09-30 23:10:54.346937', 'step': 6301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:54.414309', 'step': 6301, 'epoch': 3} {'type': 'loss', 'content': 0.022701380774378777, 'timestamp': '2025-09-30 23:10:54.419947', 'step': 6302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:54.480240', 'step': 6302, 'epoch': 3} {'type': 'loss', 'content': 0.00021251593716442585, 'timestamp': '2025-09-30 23:10:54.483173', 'step': 6303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:54.544697', 'step': 6303, 'epoch': 3} {'type': 'loss', 'content': 0.00012967827206011862, 'timestamp': '2025-09-30 23:10:54.551539', 'step': 6304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:54.617804', 'step': 6304, 'epoch': 3} {'type': 'loss', 'content': 0.028176551684737206, 'timestamp': '2025-09-30 23:10:54.623291', 'step': 6305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:54.685405', 'step': 6305, 'epoch': 3} {'type': 'loss', 'content': 0.0006769594619981945, 'timestamp': '2025-09-30 23:10:54.693323', 'step': 6306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:54.763643', 'step': 6306, 'epoch': 3} {'type': 'loss', 'content': 0.0003716196515597403, 'timestamp': '2025-09-30 23:10:54.768606', 'step': 6307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:54.837344', 'step': 6307, 'epoch': 3} {'type': 'loss', 'content': 0.0007442356436513364, 'timestamp': '2025-09-30 23:10:54.847448', 'step': 6308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:54.906479', 'step': 6308, 'epoch': 3} {'type': 'loss', 'content': 0.00478005176410079, 'timestamp': '2025-09-30 23:10:54.909646', 'step': 6309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:54.966937', 'step': 6309, 'epoch': 3} {'type': 'loss', 'content': 7.43292475817725e-05, 'timestamp': '2025-09-30 23:10:54.974085', 'step': 6310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:55.031112', 'step': 6310, 'epoch': 3} {'type': 'loss', 'content': 0.0005819229409098625, 'timestamp': '2025-09-30 23:10:55.039780', 'step': 6311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:55.107057', 'step': 6311, 'epoch': 3} {'type': 'loss', 'content': 0.0002568906347732991, 'timestamp': '2025-09-30 23:10:55.117770', 'step': 6312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:55.185803', 'step': 6312, 'epoch': 3} {'type': 'loss', 'content': 0.00030063153826631606, 'timestamp': '2025-09-30 23:10:55.190072', 'step': 6313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:55.252754', 'step': 6313, 'epoch': 3} {'type': 'loss', 'content': 0.06404296308755875, 'timestamp': '2025-09-30 23:10:55.261006', 'step': 6314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:55.339366', 'step': 6314, 'epoch': 3} {'type': 'loss', 'content': 0.04521494358778, 'timestamp': '2025-09-30 23:10:55.346591', 'step': 6315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:55.416478', 'step': 6315, 'epoch': 3} {'type': 'loss', 'content': 0.0007242756546474993, 'timestamp': '2025-09-30 23:10:55.423778', 'step': 6316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:55.483084', 'step': 6316, 'epoch': 3} {'type': 'loss', 'content': 0.013337320648133755, 'timestamp': '2025-09-30 23:10:55.485934', 'step': 6317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:55.544137', 'step': 6317, 'epoch': 3} {'type': 'loss', 'content': 0.00306688086129725, 'timestamp': '2025-09-30 23:10:55.548121', 'step': 6318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:55.614238', 'step': 6318, 'epoch': 3} {'type': 'loss', 'content': 0.00014434538024943322, 'timestamp': '2025-09-30 23:10:55.617111', 'step': 6319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:55.681831', 'step': 6319, 'epoch': 3} {'type': 'loss', 'content': 0.0029323280323296785, 'timestamp': '2025-09-30 23:10:55.693724', 'step': 6320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:55.760746', 'step': 6320, 'epoch': 3} {'type': 'loss', 'content': 0.050451818853616714, 'timestamp': '2025-09-30 23:10:55.777836', 'step': 6321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:55.844505', 'step': 6321, 'epoch': 3} {'type': 'loss', 'content': 0.0013023900100961328, 'timestamp': '2025-09-30 23:10:55.849835', 'step': 6322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:55.914096', 'step': 6322, 'epoch': 3} {'type': 'loss', 'content': 0.011512587778270245, 'timestamp': '2025-09-30 23:10:55.920176', 'step': 6323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:55.982913', 'step': 6323, 'epoch': 3} {'type': 'loss', 'content': 0.0003733176563400775, 'timestamp': '2025-09-30 23:10:55.996358', 'step': 6324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:56.061710', 'step': 6324, 'epoch': 3} {'type': 'loss', 'content': 0.00023928321024868637, 'timestamp': '2025-09-30 23:10:56.068515', 'step': 6325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:56.142995', 'step': 6325, 'epoch': 3} {'type': 'loss', 'content': 0.022687744349241257, 'timestamp': '2025-09-30 23:10:56.146304', 'step': 6326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:56.211213', 'step': 6326, 'epoch': 3} {'type': 'loss', 'content': 0.0004817892622668296, 'timestamp': '2025-09-30 23:10:56.217933', 'step': 6327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:56.287922', 'step': 6327, 'epoch': 3} {'type': 'loss', 'content': 0.0063990936614573, 'timestamp': '2025-09-30 23:10:56.299755', 'step': 6328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:56.368324', 'step': 6328, 'epoch': 3} {'type': 'loss', 'content': 0.0031221737153828144, 'timestamp': '2025-09-30 23:10:56.377633', 'step': 6329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:56.437515', 'step': 6329, 'epoch': 3} {'type': 'loss', 'content': 0.003288816660642624, 'timestamp': '2025-09-30 23:10:56.447198', 'step': 6330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:56.518157', 'step': 6330, 'epoch': 3} {'type': 'loss', 'content': 0.00039096263935789466, 'timestamp': '2025-09-30 23:10:56.525647', 'step': 6331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:56.594024', 'step': 6331, 'epoch': 3} {'type': 'loss', 'content': 0.023575594648718834, 'timestamp': '2025-09-30 23:10:56.605809', 'step': 6332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:56.668757', 'step': 6332, 'epoch': 3} {'type': 'loss', 'content': 0.0016598334768787026, 'timestamp': '2025-09-30 23:10:56.675920', 'step': 6333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:56.739182', 'step': 6333, 'epoch': 3} {'type': 'loss', 'content': 0.027642739936709404, 'timestamp': '2025-09-30 23:10:56.745243', 'step': 6334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:56.810747', 'step': 6334, 'epoch': 3} {'type': 'loss', 'content': 0.0014942217385396361, 'timestamp': '2025-09-30 23:10:56.814883', 'step': 6335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:56.875385', 'step': 6335, 'epoch': 3} {'type': 'loss', 'content': 0.0046939002349972725, 'timestamp': '2025-09-30 23:10:56.884819', 'step': 6336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:56.946498', 'step': 6336, 'epoch': 3} {'type': 'loss', 'content': 0.0010710329515859485, 'timestamp': '2025-09-30 23:10:56.951411', 'step': 6337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:57.016939', 'step': 6337, 'epoch': 3} {'type': 'loss', 'content': 0.06209411472082138, 'timestamp': '2025-09-30 23:10:57.022507', 'step': 6338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:57.087765', 'step': 6338, 'epoch': 3} {'type': 'loss', 'content': 0.006501018535345793, 'timestamp': '2025-09-30 23:10:57.095071', 'step': 6339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:57.156075', 'step': 6339, 'epoch': 3} {'type': 'loss', 'content': 0.00706512900069356, 'timestamp': '2025-09-30 23:10:57.164577', 'step': 6340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:57.229779', 'step': 6340, 'epoch': 3} {'type': 'loss', 'content': 0.00037944523501209915, 'timestamp': '2025-09-30 23:10:57.232680', 'step': 6341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:57.291896', 'step': 6341, 'epoch': 3} {'type': 'loss', 'content': 0.008459818549454212, 'timestamp': '2025-09-30 23:10:57.298648', 'step': 6342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:57.360544', 'step': 6342, 'epoch': 3} {'type': 'loss', 'content': 0.00047081636148504913, 'timestamp': '2025-09-30 23:10:57.370171', 'step': 6343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:57.441464', 'step': 6343, 'epoch': 3} {'type': 'loss', 'content': 0.06159288436174393, 'timestamp': '2025-09-30 23:10:57.448927', 'step': 6344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:57.518979', 'step': 6344, 'epoch': 3} {'type': 'loss', 'content': 0.00961264967918396, 'timestamp': '2025-09-30 23:10:57.523626', 'step': 6345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:57.594569', 'step': 6345, 'epoch': 3} {'type': 'loss', 'content': 0.006640516221523285, 'timestamp': '2025-09-30 23:10:57.598509', 'step': 6346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:57.663301', 'step': 6346, 'epoch': 3} {'type': 'loss', 'content': 0.0008666656212881207, 'timestamp': '2025-09-30 23:10:57.671561', 'step': 6347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:57.740027', 'step': 6347, 'epoch': 3} {'type': 'loss', 'content': 0.0004956416669301689, 'timestamp': '2025-09-30 23:10:57.751349', 'step': 6348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:57.825170', 'step': 6348, 'epoch': 3} {'type': 'loss', 'content': 0.000434366287663579, 'timestamp': '2025-09-30 23:10:57.835031', 'step': 6349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:57.903467', 'step': 6349, 'epoch': 3} {'type': 'loss', 'content': 0.0026041478849947453, 'timestamp': '2025-09-30 23:10:57.907161', 'step': 6350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:57.981957', 'step': 6350, 'epoch': 3} {'type': 'loss', 'content': 0.0007275535026565194, 'timestamp': '2025-09-30 23:10:57.985275', 'step': 6351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:58.064599', 'step': 6351, 'epoch': 3} {'type': 'loss', 'content': 0.0022985930554568768, 'timestamp': '2025-09-30 23:10:58.076425', 'step': 6352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:58.139821', 'step': 6352, 'epoch': 3} {'type': 'loss', 'content': 0.0009090664098039269, 'timestamp': '2025-09-30 23:10:58.146747', 'step': 6353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:58.210266', 'step': 6353, 'epoch': 3} {'type': 'loss', 'content': 0.017027078196406364, 'timestamp': '2025-09-30 23:10:58.218257', 'step': 6354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:58.287739', 'step': 6354, 'epoch': 3} {'type': 'loss', 'content': 0.0003301123797427863, 'timestamp': '2025-09-30 23:10:58.295536', 'step': 6355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:10:58.362415', 'step': 6355, 'epoch': 3} {'type': 'loss', 'content': 0.004294952843338251, 'timestamp': '2025-09-30 23:10:58.372572', 'step': 6356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:58.445880', 'step': 6356, 'epoch': 3} {'type': 'loss', 'content': 0.014056636951863766, 'timestamp': '2025-09-30 23:10:58.449081', 'step': 6357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:58.507431', 'step': 6357, 'epoch': 3} {'type': 'loss', 'content': 0.013672932982444763, 'timestamp': '2025-09-30 23:10:58.510064', 'step': 6358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:58.575371', 'step': 6358, 'epoch': 3} {'type': 'loss', 'content': 0.00028535444289445877, 'timestamp': '2025-09-30 23:10:58.582637', 'step': 6359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:58.644644', 'step': 6359, 'epoch': 3} {'type': 'loss', 'content': 0.0006077264552004635, 'timestamp': '2025-09-30 23:10:58.659475', 'step': 6360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:58.724481', 'step': 6360, 'epoch': 3} {'type': 'loss', 'content': 0.0009362126002088189, 'timestamp': '2025-09-30 23:10:58.732006', 'step': 6361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:58.796325', 'step': 6361, 'epoch': 3} {'type': 'loss', 'content': 0.008470283821225166, 'timestamp': '2025-09-30 23:10:58.799059', 'step': 6362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:58.855319', 'step': 6362, 'epoch': 3} {'type': 'loss', 'content': 0.018010277301073074, 'timestamp': '2025-09-30 23:10:58.864391', 'step': 6363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:58.926799', 'step': 6363, 'epoch': 3} {'type': 'loss', 'content': 0.0005847433349117637, 'timestamp': '2025-09-30 23:10:58.933651', 'step': 6364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:59.002983', 'step': 6364, 'epoch': 3} {'type': 'loss', 'content': 0.0023043458350002766, 'timestamp': '2025-09-30 23:10:59.020113', 'step': 6365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:59.090618', 'step': 6365, 'epoch': 3} {'type': 'loss', 'content': 0.02971435897052288, 'timestamp': '2025-09-30 23:10:59.093262', 'step': 6366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:59.155616', 'step': 6366, 'epoch': 3} {'type': 'loss', 'content': 0.026228507980704308, 'timestamp': '2025-09-30 23:10:59.162441', 'step': 6367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:59.226827', 'step': 6367, 'epoch': 3} {'type': 'loss', 'content': 0.001228566630743444, 'timestamp': '2025-09-30 23:10:59.238335', 'step': 6368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:59.310376', 'step': 6368, 'epoch': 3} {'type': 'loss', 'content': 0.00027222931385040283, 'timestamp': '2025-09-30 23:10:59.316925', 'step': 6369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:10:59.388884', 'step': 6369, 'epoch': 3} {'type': 'loss', 'content': 0.0027542293537408113, 'timestamp': '2025-09-30 23:10:59.395833', 'step': 6370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:59.465891', 'step': 6370, 'epoch': 3} {'type': 'loss', 'content': 0.0005063946009613574, 'timestamp': '2025-09-30 23:10:59.472200', 'step': 6371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:59.547764', 'step': 6371, 'epoch': 3} {'type': 'loss', 'content': 0.0007940270006656647, 'timestamp': '2025-09-30 23:10:59.556719', 'step': 6372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:59.633989', 'step': 6372, 'epoch': 3} {'type': 'loss', 'content': 0.01680249534547329, 'timestamp': '2025-09-30 23:10:59.645188', 'step': 6373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:59.703506', 'step': 6373, 'epoch': 3} {'type': 'loss', 'content': 0.0016505880048498511, 'timestamp': '2025-09-30 23:10:59.707533', 'step': 6374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:59.786140', 'step': 6374, 'epoch': 3} {'type': 'loss', 'content': 0.023542284965515137, 'timestamp': '2025-09-30 23:10:59.798140', 'step': 6375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:59.881394', 'step': 6375, 'epoch': 3} {'type': 'loss', 'content': 0.0020213613752275705, 'timestamp': '2025-09-30 23:10:59.894609', 'step': 6376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:10:59.963855', 'step': 6376, 'epoch': 3} {'type': 'loss', 'content': 0.0026294682174921036, 'timestamp': '2025-09-30 23:10:59.967193', 'step': 6377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:00.028397', 'step': 6377, 'epoch': 3} {'type': 'loss', 'content': 0.0013898719334974885, 'timestamp': '2025-09-30 23:11:00.035918', 'step': 6378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:00.109132', 'step': 6378, 'epoch': 3} {'type': 'loss', 'content': 0.0015421932330355048, 'timestamp': '2025-09-30 23:11:00.113595', 'step': 6379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:00.175917', 'step': 6379, 'epoch': 3} {'type': 'loss', 'content': 0.001999929780140519, 'timestamp': '2025-09-30 23:11:00.184785', 'step': 6380, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:00.243601', 'step': 6380, 'epoch': 3} {'type': 'loss', 'content': 0.006439823191612959, 'timestamp': '2025-09-30 23:11:00.249416', 'step': 6381, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:00.318053', 'step': 6381, 'epoch': 3} {'type': 'loss', 'content': 0.044804830104112625, 'timestamp': '2025-09-30 23:11:00.323975', 'step': 6382, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:00.387461', 'step': 6382, 'epoch': 3} {'type': 'loss', 'content': 0.003994728904217482, 'timestamp': '2025-09-30 23:11:00.392879', 'step': 6383, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:00.457595', 'step': 6383, 'epoch': 3} {'type': 'loss', 'content': 0.025498032569885254, 'timestamp': '2025-09-30 23:11:00.465886', 'step': 6384, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1596914505344}], 'timestamp': '2025-09-30 23:11:05.701522', 'step': 6384, 'epoch': 3} {'type': 'pplx', 'content': 7481230.95657432, 'timestamp': '2025-09-30 23:11:05.707680', 'step': 6384, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:05.764277', 'step': 6384, 'epoch': 3} {'type': 'loss', 'content': 0.012879753485321999, 'timestamp': '2025-09-30 23:11:05.769102', 'step': 6385, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:05.828805', 'step': 6385, 'epoch': 3} {'type': 'loss', 'content': 0.041176505386829376, 'timestamp': '2025-09-30 23:11:05.833460', 'step': 6386, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:05.895264', 'step': 6386, 'epoch': 3} {'type': 'loss', 'content': 0.0005373024614527822, 'timestamp': '2025-09-30 23:11:05.900701', 'step': 6387, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:05.961431', 'step': 6387, 'epoch': 3} {'type': 'loss', 'content': 0.00030050924397073686, 'timestamp': '2025-09-30 23:11:05.971441', 'step': 6388, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:06.035236', 'step': 6388, 'epoch': 3} {'type': 'loss', 'content': 0.0015252131270244718, 'timestamp': '2025-09-30 23:11:06.039482', 'step': 6389, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:06.096009', 'step': 6389, 'epoch': 3} {'type': 'loss', 'content': 0.0009364594588987529, 'timestamp': '2025-09-30 23:11:06.100054', 'step': 6390, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:06.157682', 'step': 6390, 'epoch': 3} {'type': 'loss', 'content': 0.012036927975714207, 'timestamp': '2025-09-30 23:11:06.160619', 'step': 6391, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:06.215287', 'step': 6391, 'epoch': 3} {'type': 'loss', 'content': 0.02181861363351345, 'timestamp': '2025-09-30 23:11:06.222134', 'step': 6392, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:06.276888', 'step': 6392, 'epoch': 3} {'type': 'loss', 'content': 0.0025601580273360014, 'timestamp': '2025-09-30 23:11:06.280363', 'step': 6393, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:06.335310', 'step': 6393, 'epoch': 3} {'type': 'loss', 'content': 0.0011461821850389242, 'timestamp': '2025-09-30 23:11:06.343321', 'step': 6394, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:06.413895', 'step': 6394, 'epoch': 3} {'type': 'loss', 'content': 0.001436161925084889, 'timestamp': '2025-09-30 23:11:06.421860', 'step': 6395, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:06.492010', 'step': 6395, 'epoch': 3} {'type': 'loss', 'content': 0.001958841225132346, 'timestamp': '2025-09-30 23:11:06.500251', 'step': 6396, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:06.565286', 'step': 6396, 'epoch': 3} {'type': 'loss', 'content': 0.0037996957544237375, 'timestamp': '2025-09-30 23:11:06.571714', 'step': 6397, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:06.641059', 'step': 6397, 'epoch': 3} {'type': 'loss', 'content': 0.0474431999027729, 'timestamp': '2025-09-30 23:11:06.646679', 'step': 6398, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:06.701338', 'step': 6398, 'epoch': 3} {'type': 'loss', 'content': 0.020141473039984703, 'timestamp': '2025-09-30 23:11:06.707013', 'step': 6399, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:06.767003', 'step': 6399, 'epoch': 3} {'type': 'loss', 'content': 0.02626052126288414, 'timestamp': '2025-09-30 23:11:06.773126', 'step': 6400, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:06.831145', 'step': 6400, 'epoch': 3} {'type': 'loss', 'content': 0.0020863653626292944, 'timestamp': '2025-09-30 23:11:06.834465', 'step': 6401, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:06.894680', 'step': 6401, 'epoch': 3} {'type': 'loss', 'content': 0.007288514636456966, 'timestamp': '2025-09-30 23:11:06.900765', 'step': 6402, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:06.962211', 'step': 6402, 'epoch': 3} {'type': 'loss', 'content': 0.009204343892633915, 'timestamp': '2025-09-30 23:11:06.967539', 'step': 6403, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:07.032593', 'step': 6403, 'epoch': 3} {'type': 'loss', 'content': 0.002989352447912097, 'timestamp': '2025-09-30 23:11:07.040358', 'step': 6404, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:07.096410', 'step': 6404, 'epoch': 3} {'type': 'loss', 'content': 0.0014994710218161345, 'timestamp': '2025-09-30 23:11:07.100520', 'step': 6405, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:07.157440', 'step': 6405, 'epoch': 3} {'type': 'loss', 'content': 0.014862634241580963, 'timestamp': '2025-09-30 23:11:07.160132', 'step': 6406, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:07.227160', 'step': 6406, 'epoch': 3} {'type': 'loss', 'content': 0.0006779864197596908, 'timestamp': '2025-09-30 23:11:07.234065', 'step': 6407, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:07.300121', 'step': 6407, 'epoch': 3} {'type': 'loss', 'content': 0.003491069423034787, 'timestamp': '2025-09-30 23:11:07.310254', 'step': 6408, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:07.376533', 'step': 6408, 'epoch': 3} {'type': 'loss', 'content': 0.0028864832129329443, 'timestamp': '2025-09-30 23:11:07.381442', 'step': 6409, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:07.447056', 'step': 6409, 'epoch': 3} {'type': 'loss', 'content': 0.0007823928608559072, 'timestamp': '2025-09-30 23:11:07.451805', 'step': 6410, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:07.512327', 'step': 6410, 'epoch': 3} {'type': 'loss', 'content': 0.010038315318524837, 'timestamp': '2025-09-30 23:11:07.518003', 'step': 6411, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:11:07.575657', 'step': 6411, 'epoch': 3} {'type': 'loss', 'content': 0.002272688550874591, 'timestamp': '2025-09-30 23:11:07.584284', 'step': 6412, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:07.652351', 'step': 6412, 'epoch': 3} {'type': 'loss', 'content': 0.023646635934710503, 'timestamp': '2025-09-30 23:11:07.656236', 'step': 6413, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:07.716563', 'step': 6413, 'epoch': 3} {'type': 'loss', 'content': 0.002119517419487238, 'timestamp': '2025-09-30 23:11:07.720169', 'step': 6414, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:07.779562', 'step': 6414, 'epoch': 3} {'type': 'loss', 'content': 0.00021129929518792778, 'timestamp': '2025-09-30 23:11:07.782761', 'step': 6415, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:07.864405', 'step': 6415, 'epoch': 3} {'type': 'loss', 'content': 0.005711239296942949, 'timestamp': '2025-09-30 23:11:07.870677', 'step': 6416, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:07.926984', 'step': 6416, 'epoch': 3} {'type': 'loss', 'content': 0.001891844323836267, 'timestamp': '2025-09-30 23:11:07.937070', 'step': 6417, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:07.997427', 'step': 6417, 'epoch': 3} {'type': 'loss', 'content': 0.0037650312297046185, 'timestamp': '2025-09-30 23:11:08.002113', 'step': 6418, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:08.074755', 'step': 6418, 'epoch': 3} {'type': 'loss', 'content': 0.03376536816358566, 'timestamp': '2025-09-30 23:11:08.088829', 'step': 6419, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:08.169517', 'step': 6419, 'epoch': 3} {'type': 'loss', 'content': 0.0031190509907901287, 'timestamp': '2025-09-30 23:11:08.178411', 'step': 6420, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:08.258043', 'step': 6420, 'epoch': 3} {'type': 'loss', 'content': 0.0008534484659321606, 'timestamp': '2025-09-30 23:11:08.261290', 'step': 6421, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:11:08.323685', 'step': 6421, 'epoch': 3} {'type': 'loss', 'content': 0.0017396981129422784, 'timestamp': '2025-09-30 23:11:08.326768', 'step': 6422, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:08.395496', 'step': 6422, 'epoch': 3} {'type': 'loss', 'content': 0.0031330601777881384, 'timestamp': '2025-09-30 23:11:08.401703', 'step': 6423, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:08.468633', 'step': 6423, 'epoch': 3} {'type': 'loss', 'content': 0.0024383559357374907, 'timestamp': '2025-09-30 23:11:08.477698', 'step': 6424, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:08.531360', 'step': 6424, 'epoch': 3} {'type': 'loss', 'content': 0.014284031465649605, 'timestamp': '2025-09-30 23:11:08.538494', 'step': 6425, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:08.603786', 'step': 6425, 'epoch': 3} {'type': 'loss', 'content': 0.0062712510116398335, 'timestamp': '2025-09-30 23:11:08.612223', 'step': 6426, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:08.675390', 'step': 6426, 'epoch': 3} {'type': 'loss', 'content': 0.02449570596218109, 'timestamp': '2025-09-30 23:11:08.677946', 'step': 6427, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:08.738808', 'step': 6427, 'epoch': 3} {'type': 'loss', 'content': 0.003241569036617875, 'timestamp': '2025-09-30 23:11:08.746885', 'step': 6428, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:08.805370', 'step': 6428, 'epoch': 3} {'type': 'loss', 'content': 0.00029038841603323817, 'timestamp': '2025-09-30 23:11:08.811018', 'step': 6429, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:08.871399', 'step': 6429, 'epoch': 3} {'type': 'loss', 'content': 0.02674853429198265, 'timestamp': '2025-09-30 23:11:08.874738', 'step': 6430, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:08.939942', 'step': 6430, 'epoch': 3} {'type': 'loss', 'content': 0.00048473948845639825, 'timestamp': '2025-09-30 23:11:08.946771', 'step': 6431, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:09.011184', 'step': 6431, 'epoch': 3} {'type': 'loss', 'content': 0.00024893131921999156, 'timestamp': '2025-09-30 23:11:09.023346', 'step': 6432, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:09.090754', 'step': 6432, 'epoch': 3} {'type': 'loss', 'content': 0.008207685314118862, 'timestamp': '2025-09-30 23:11:09.098963', 'step': 6433, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:09.161619', 'step': 6433, 'epoch': 3} {'type': 'loss', 'content': 0.0017684856429696083, 'timestamp': '2025-09-30 23:11:09.167481', 'step': 6434, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:09.241094', 'step': 6434, 'epoch': 3} {'type': 'loss', 'content': 0.001437133876606822, 'timestamp': '2025-09-30 23:11:09.244039', 'step': 6435, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 2880017550912.0}, 'timestamp': '2025-09-30 23:11:09.303899', 'step': 6435, 'epoch': 3} {'type': 'loss', 'content': 0.011187187395989895, 'timestamp': '2025-09-30 23:11:09.313258', 'step': 6436, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:09.378978', 'step': 6436, 'epoch': 3} {'type': 'loss', 'content': 0.0045640128664672375, 'timestamp': '2025-09-30 23:11:09.381797', 'step': 6437, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:09.441412', 'step': 6437, 'epoch': 3} {'type': 'loss', 'content': 0.0003404112649150193, 'timestamp': '2025-09-30 23:11:09.444950', 'step': 6438, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:09.513131', 'step': 6438, 'epoch': 3} {'type': 'loss', 'content': 0.0006182377110235393, 'timestamp': '2025-09-30 23:11:09.518863', 'step': 6439, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:09.576853', 'step': 6439, 'epoch': 3} {'type': 'loss', 'content': 0.0013038933975622058, 'timestamp': '2025-09-30 23:11:09.584709', 'step': 6440, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:09.642185', 'step': 6440, 'epoch': 3} {'type': 'loss', 'content': 0.004970483481884003, 'timestamp': '2025-09-30 23:11:09.649961', 'step': 6441, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:09.721922', 'step': 6441, 'epoch': 3} {'type': 'loss', 'content': 0.001864360412582755, 'timestamp': '2025-09-30 23:11:09.726006', 'step': 6442, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:09.793725', 'step': 6442, 'epoch': 3} {'type': 'loss', 'content': 0.01730223558843136, 'timestamp': '2025-09-30 23:11:09.797479', 'step': 6443, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:09.873139', 'step': 6443, 'epoch': 3} {'type': 'loss', 'content': 0.001823146129027009, 'timestamp': '2025-09-30 23:11:09.882067', 'step': 6444, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:09.960208', 'step': 6444, 'epoch': 3} {'type': 'loss', 'content': 0.03491317108273506, 'timestamp': '2025-09-30 23:11:09.966013', 'step': 6445, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:10.030817', 'step': 6445, 'epoch': 3} {'type': 'loss', 'content': 0.0014931437326595187, 'timestamp': '2025-09-30 23:11:10.042055', 'step': 6446, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:10.113786', 'step': 6446, 'epoch': 3} {'type': 'loss', 'content': 0.0006622665678150952, 'timestamp': '2025-09-30 23:11:10.117965', 'step': 6447, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:10.181127', 'step': 6447, 'epoch': 3} {'type': 'loss', 'content': 0.03635173663496971, 'timestamp': '2025-09-30 23:11:10.189846', 'step': 6448, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:10.255004', 'step': 6448, 'epoch': 3} {'type': 'loss', 'content': 0.016635973006486893, 'timestamp': '2025-09-30 23:11:10.257984', 'step': 6449, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:10.321751', 'step': 6449, 'epoch': 3} {'type': 'loss', 'content': 0.0004547389398794621, 'timestamp': '2025-09-30 23:11:10.326821', 'step': 6450, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:10.398399', 'step': 6450, 'epoch': 3} {'type': 'loss', 'content': 0.008580869995057583, 'timestamp': '2025-09-30 23:11:10.401494', 'step': 6451, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:10.480620', 'step': 6451, 'epoch': 3} {'type': 'loss', 'content': 0.016822706907987595, 'timestamp': '2025-09-30 23:11:10.491403', 'step': 6452, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:10.565055', 'step': 6452, 'epoch': 3} {'type': 'loss', 'content': 0.0011653645196929574, 'timestamp': '2025-09-30 23:11:10.568637', 'step': 6453, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:10.626334', 'step': 6453, 'epoch': 3} {'type': 'loss', 'content': 0.017013603821396828, 'timestamp': '2025-09-30 23:11:10.630060', 'step': 6454, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:10.693770', 'step': 6454, 'epoch': 3} {'type': 'loss', 'content': 0.000385571998776868, 'timestamp': '2025-09-30 23:11:10.701463', 'step': 6455, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:10.763818', 'step': 6455, 'epoch': 3} {'type': 'loss', 'content': 0.00032826419919729233, 'timestamp': '2025-09-30 23:11:10.770952', 'step': 6456, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:10.838691', 'step': 6456, 'epoch': 3} {'type': 'loss', 'content': 0.0013520994689315557, 'timestamp': '2025-09-30 23:11:10.847710', 'step': 6457, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:10.908146', 'step': 6457, 'epoch': 3} {'type': 'loss', 'content': 0.0503493957221508, 'timestamp': '2025-09-30 23:11:10.911481', 'step': 6458, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:10.981280', 'step': 6458, 'epoch': 3} {'type': 'loss', 'content': 0.003483231645077467, 'timestamp': '2025-09-30 23:11:10.984462', 'step': 6459, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:11.042872', 'step': 6459, 'epoch': 3} {'type': 'loss', 'content': 0.0071947271935641766, 'timestamp': '2025-09-30 23:11:11.052461', 'step': 6460, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:11.119299', 'step': 6460, 'epoch': 3} {'type': 'loss', 'content': 0.014321289956569672, 'timestamp': '2025-09-30 23:11:11.127894', 'step': 6461, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:11.194422', 'step': 6461, 'epoch': 3} {'type': 'loss', 'content': 0.03038560412824154, 'timestamp': '2025-09-30 23:11:11.198476', 'step': 6462, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:11.266874', 'step': 6462, 'epoch': 3} {'type': 'loss', 'content': 0.023252740502357483, 'timestamp': '2025-09-30 23:11:11.273000', 'step': 6463, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:11.335047', 'step': 6463, 'epoch': 3} {'type': 'loss', 'content': 0.008751134388148785, 'timestamp': '2025-09-30 23:11:11.343046', 'step': 6464, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:11.408379', 'step': 6464, 'epoch': 3} {'type': 'loss', 'content': 0.0006235718028619885, 'timestamp': '2025-09-30 23:11:11.416124', 'step': 6465, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:11.470129', 'step': 6465, 'epoch': 3} {'type': 'loss', 'content': 0.010754753835499287, 'timestamp': '2025-09-30 23:11:11.473475', 'step': 6466, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:11.529605', 'step': 6466, 'epoch': 3} {'type': 'loss', 'content': 0.010577455163002014, 'timestamp': '2025-09-30 23:11:11.533247', 'step': 6467, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:11.592130', 'step': 6467, 'epoch': 3} {'type': 'loss', 'content': 0.0006005087052471936, 'timestamp': '2025-09-30 23:11:11.599151', 'step': 6468, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:11.654404', 'step': 6468, 'epoch': 3} {'type': 'loss', 'content': 0.005042628850787878, 'timestamp': '2025-09-30 23:11:11.658983', 'step': 6469, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:11.716059', 'step': 6469, 'epoch': 3} {'type': 'loss', 'content': 0.0009544029599055648, 'timestamp': '2025-09-30 23:11:11.720816', 'step': 6470, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:11.777025', 'step': 6470, 'epoch': 3} {'type': 'loss', 'content': 0.0002551176876295358, 'timestamp': '2025-09-30 23:11:11.780398', 'step': 6471, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:11.837537', 'step': 6471, 'epoch': 3} {'type': 'loss', 'content': 0.0002109520573867485, 'timestamp': '2025-09-30 23:11:11.844146', 'step': 6472, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:11.900227', 'step': 6472, 'epoch': 3} {'type': 'loss', 'content': 0.012779371812939644, 'timestamp': '2025-09-30 23:11:11.904475', 'step': 6473, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:11.960498', 'step': 6473, 'epoch': 3} {'type': 'loss', 'content': 0.002121248049661517, 'timestamp': '2025-09-30 23:11:11.963926', 'step': 6474, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:12.020130', 'step': 6474, 'epoch': 3} {'type': 'loss', 'content': 0.0005356451729312539, 'timestamp': '2025-09-30 23:11:12.023336', 'step': 6475, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:12.084725', 'step': 6475, 'epoch': 3} {'type': 'loss', 'content': 0.0005878439988009632, 'timestamp': '2025-09-30 23:11:12.091294', 'step': 6476, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:12.157372', 'step': 6476, 'epoch': 3} {'type': 'loss', 'content': 0.0004059940984006971, 'timestamp': '2025-09-30 23:11:12.160339', 'step': 6477, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:12.214747', 'step': 6477, 'epoch': 3} {'type': 'loss', 'content': 0.0002472669002600014, 'timestamp': '2025-09-30 23:11:12.221300', 'step': 6478, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:12.287917', 'step': 6478, 'epoch': 3} {'type': 'loss', 'content': 0.0038844491355121136, 'timestamp': '2025-09-30 23:11:12.293818', 'step': 6479, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:12.355192', 'step': 6479, 'epoch': 3} {'type': 'loss', 'content': 0.00018073881801683456, 'timestamp': '2025-09-30 23:11:12.362029', 'step': 6480, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:12.418828', 'step': 6480, 'epoch': 3} {'type': 'loss', 'content': 0.000523461785633117, 'timestamp': '2025-09-30 23:11:12.422949', 'step': 6481, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:12.481009', 'step': 6481, 'epoch': 3} {'type': 'loss', 'content': 0.0005444627022370696, 'timestamp': '2025-09-30 23:11:12.484292', 'step': 6482, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:12.539891', 'step': 6482, 'epoch': 3} {'type': 'loss', 'content': 0.01966848410665989, 'timestamp': '2025-09-30 23:11:12.543156', 'step': 6483, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:12.604310', 'step': 6483, 'epoch': 3} {'type': 'loss', 'content': 0.03693220391869545, 'timestamp': '2025-09-30 23:11:12.611131', 'step': 6484, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:12.665692', 'step': 6484, 'epoch': 3} {'type': 'loss', 'content': 0.00027572191902436316, 'timestamp': '2025-09-30 23:11:12.669964', 'step': 6485, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:12.724018', 'step': 6485, 'epoch': 3} {'type': 'loss', 'content': 0.0005697333253920078, 'timestamp': '2025-09-30 23:11:12.726976', 'step': 6486, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:12.784092', 'step': 6486, 'epoch': 3} {'type': 'loss', 'content': 0.0037524180952459574, 'timestamp': '2025-09-30 23:11:12.786637', 'step': 6487, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:12.840258', 'step': 6487, 'epoch': 3} {'type': 'loss', 'content': 0.0013594824122264981, 'timestamp': '2025-09-30 23:11:12.846473', 'step': 6488, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:12.900162', 'step': 6488, 'epoch': 3} {'type': 'loss', 'content': 0.041015855967998505, 'timestamp': '2025-09-30 23:11:12.903164', 'step': 6489, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:12.959585', 'step': 6489, 'epoch': 3} {'type': 'loss', 'content': 0.03983807563781738, 'timestamp': '2025-09-30 23:11:12.962531', 'step': 6490, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:13.016100', 'step': 6490, 'epoch': 3} {'type': 'loss', 'content': 0.0008465908467769623, 'timestamp': '2025-09-30 23:11:13.021896', 'step': 6491, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:13.077815', 'step': 6491, 'epoch': 3} {'type': 'loss', 'content': 0.0004082934174221009, 'timestamp': '2025-09-30 23:11:13.083842', 'step': 6492, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:13.137367', 'step': 6492, 'epoch': 3} {'type': 'loss', 'content': 0.00027382641565054655, 'timestamp': '2025-09-30 23:11:13.139983', 'step': 6493, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:13.195475', 'step': 6493, 'epoch': 3} {'type': 'loss', 'content': 0.0003915913694072515, 'timestamp': '2025-09-30 23:11:13.199035', 'step': 6494, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:13.254665', 'step': 6494, 'epoch': 3} {'type': 'loss', 'content': 0.02404273860156536, 'timestamp': '2025-09-30 23:11:13.257504', 'step': 6495, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:13.311935', 'step': 6495, 'epoch': 3} {'type': 'loss', 'content': 0.02991938777267933, 'timestamp': '2025-09-30 23:11:13.319129', 'step': 6496, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:13.372702', 'step': 6496, 'epoch': 3} {'type': 'loss', 'content': 0.0009659515926614404, 'timestamp': '2025-09-30 23:11:13.375283', 'step': 6497, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:13.431170', 'step': 6497, 'epoch': 3} {'type': 'loss', 'content': 0.002940561156719923, 'timestamp': '2025-09-30 23:11:13.434464', 'step': 6498, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:13.488749', 'step': 6498, 'epoch': 3} {'type': 'loss', 'content': 0.002499025547876954, 'timestamp': '2025-09-30 23:11:13.494753', 'step': 6499, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:13.557230', 'step': 6499, 'epoch': 3} {'type': 'loss', 'content': 0.07637779414653778, 'timestamp': '2025-09-30 23:11:13.563600', 'step': 6500, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 6500', 'timestamp': '2025-09-30 23:11:13.962360', 'step': 6500, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:14.026897', 'step': 6500, 'epoch': 3} {'type': 'loss', 'content': 0.007010913919657469, 'timestamp': '2025-09-30 23:11:14.029445', 'step': 6501, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:14.094162', 'step': 6501, 'epoch': 3} {'type': 'loss', 'content': 0.0004749407817143947, 'timestamp': '2025-09-30 23:11:14.097015', 'step': 6502, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:14.156373', 'step': 6502, 'epoch': 3} {'type': 'loss', 'content': 0.0004490081628318876, 'timestamp': '2025-09-30 23:11:14.159362', 'step': 6503, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:14.214265', 'step': 6503, 'epoch': 3} {'type': 'loss', 'content': 0.0005751705029979348, 'timestamp': '2025-09-30 23:11:14.221247', 'step': 6504, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:14.278878', 'step': 6504, 'epoch': 3} {'type': 'loss', 'content': 0.0005181870074011385, 'timestamp': '2025-09-30 23:11:14.281178', 'step': 6505, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:14.341200', 'step': 6505, 'epoch': 3} {'type': 'loss', 'content': 0.0028219781816005707, 'timestamp': '2025-09-30 23:11:14.344287', 'step': 6506, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:14.398608', 'step': 6506, 'epoch': 3} {'type': 'loss', 'content': 0.00015953842375893146, 'timestamp': '2025-09-30 23:11:14.406719', 'step': 6507, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:14.472547', 'step': 6507, 'epoch': 3} {'type': 'loss', 'content': 0.0013026943197473884, 'timestamp': '2025-09-30 23:11:14.482291', 'step': 6508, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:14.544781', 'step': 6508, 'epoch': 3} {'type': 'loss', 'content': 0.012982814572751522, 'timestamp': '2025-09-30 23:11:14.547390', 'step': 6509, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:14.604506', 'step': 6509, 'epoch': 3} {'type': 'loss', 'content': 0.0009776154765859246, 'timestamp': '2025-09-30 23:11:14.607098', 'step': 6510, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:14.661697', 'step': 6510, 'epoch': 3} {'type': 'loss', 'content': 0.0029034498147666454, 'timestamp': '2025-09-30 23:11:14.665873', 'step': 6511, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:14.722831', 'step': 6511, 'epoch': 3} {'type': 'loss', 'content': 0.0004366580687928945, 'timestamp': '2025-09-30 23:11:14.729397', 'step': 6512, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:14.784075', 'step': 6512, 'epoch': 3} {'type': 'loss', 'content': 0.00034249696182087064, 'timestamp': '2025-09-30 23:11:14.786409', 'step': 6513, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:14.840913', 'step': 6513, 'epoch': 3} {'type': 'loss', 'content': 0.0034590756986290216, 'timestamp': '2025-09-30 23:11:14.846068', 'step': 6514, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:14.906783', 'step': 6514, 'epoch': 3} {'type': 'loss', 'content': 0.0029521717224270105, 'timestamp': '2025-09-30 23:11:14.922177', 'step': 6515, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:14.981852', 'step': 6515, 'epoch': 3} {'type': 'loss', 'content': 0.001369710429571569, 'timestamp': '2025-09-30 23:11:14.989045', 'step': 6516, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:15.047115', 'step': 6516, 'epoch': 3} {'type': 'loss', 'content': 0.0005810525617562234, 'timestamp': '2025-09-30 23:11:15.050684', 'step': 6517, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:15.106856', 'step': 6517, 'epoch': 3} {'type': 'loss', 'content': 0.0005703270435333252, 'timestamp': '2025-09-30 23:11:15.109406', 'step': 6518, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:15.164657', 'step': 6518, 'epoch': 3} {'type': 'loss', 'content': 0.0004619820392690599, 'timestamp': '2025-09-30 23:11:15.169310', 'step': 6519, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:15.232016', 'step': 6519, 'epoch': 3} {'type': 'loss', 'content': 0.02152485027909279, 'timestamp': '2025-09-30 23:11:15.238834', 'step': 6520, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:15.307465', 'step': 6520, 'epoch': 3} {'type': 'loss', 'content': 0.0018235733732581139, 'timestamp': '2025-09-30 23:11:15.309292', 'step': 6521, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:15.365029', 'step': 6521, 'epoch': 3} {'type': 'loss', 'content': 0.011570760048925877, 'timestamp': '2025-09-30 23:11:15.368928', 'step': 6522, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:15.425186', 'step': 6522, 'epoch': 3} {'type': 'loss', 'content': 0.0018484058091416955, 'timestamp': '2025-09-30 23:11:15.431743', 'step': 6523, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:15.494305', 'step': 6523, 'epoch': 3} {'type': 'loss', 'content': 0.00045300257625058293, 'timestamp': '2025-09-30 23:11:15.506148', 'step': 6524, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:15.565438', 'step': 6524, 'epoch': 3} {'type': 'loss', 'content': 0.0021244788076728582, 'timestamp': '2025-09-30 23:11:15.569788', 'step': 6525, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:15.628131', 'step': 6525, 'epoch': 3} {'type': 'loss', 'content': 0.0006139161996543407, 'timestamp': '2025-09-30 23:11:15.632380', 'step': 6526, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:15.689628', 'step': 6526, 'epoch': 3} {'type': 'loss', 'content': 0.000621538725681603, 'timestamp': '2025-09-30 23:11:15.693463', 'step': 6527, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:15.749639', 'step': 6527, 'epoch': 3} {'type': 'loss', 'content': 0.013511588796973228, 'timestamp': '2025-09-30 23:11:15.757714', 'step': 6528, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:15.819550', 'step': 6528, 'epoch': 3} {'type': 'loss', 'content': 0.006119123660027981, 'timestamp': '2025-09-30 23:11:15.822400', 'step': 6529, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:15.882758', 'step': 6529, 'epoch': 3} {'type': 'loss', 'content': 0.0005174625548534095, 'timestamp': '2025-09-30 23:11:15.886000', 'step': 6530, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:15.947238', 'step': 6530, 'epoch': 3} {'type': 'loss', 'content': 0.022361459210515022, 'timestamp': '2025-09-30 23:11:15.951349', 'step': 6531, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:16.017830', 'step': 6531, 'epoch': 3} {'type': 'loss', 'content': 0.025810379534959793, 'timestamp': '2025-09-30 23:11:16.028368', 'step': 6532, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:16.090293', 'step': 6532, 'epoch': 3} {'type': 'loss', 'content': 0.005324683152139187, 'timestamp': '2025-09-30 23:11:16.097761', 'step': 6533, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:16.180647', 'step': 6533, 'epoch': 3} {'type': 'loss', 'content': 0.0001598435774212703, 'timestamp': '2025-09-30 23:11:16.184549', 'step': 6534, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:16.240358', 'step': 6534, 'epoch': 3} {'type': 'loss', 'content': 0.0008334267768077552, 'timestamp': '2025-09-30 23:11:16.243676', 'step': 6535, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:16.300622', 'step': 6535, 'epoch': 3} {'type': 'loss', 'content': 0.033107344061136246, 'timestamp': '2025-09-30 23:11:16.306465', 'step': 6536, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1596914505344}], 'timestamp': '2025-09-30 23:11:20.379004', 'step': 6536, 'epoch': 3} {'type': 'pplx', 'content': 6991139.369358495, 'timestamp': '2025-09-30 23:11:20.382754', 'step': 6536, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:20.435496', 'step': 6536, 'epoch': 3} {'type': 'loss', 'content': 0.00027980920276604593, 'timestamp': '2025-09-30 23:11:20.438055', 'step': 6537, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:20.493231', 'step': 6537, 'epoch': 3} {'type': 'loss', 'content': 0.02865399420261383, 'timestamp': '2025-09-30 23:11:20.498403', 'step': 6538, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:20.562130', 'step': 6538, 'epoch': 3} {'type': 'loss', 'content': 0.003367604222148657, 'timestamp': '2025-09-30 23:11:20.567888', 'step': 6539, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:20.629015', 'step': 6539, 'epoch': 3} {'type': 'loss', 'content': 0.0010997195495292544, 'timestamp': '2025-09-30 23:11:20.636851', 'step': 6540, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:20.701753', 'step': 6540, 'epoch': 3} {'type': 'loss', 'content': 0.00023795886954758316, 'timestamp': '2025-09-30 23:11:20.708769', 'step': 6541, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:20.774372', 'step': 6541, 'epoch': 3} {'type': 'loss', 'content': 0.02194744534790516, 'timestamp': '2025-09-30 23:11:20.777905', 'step': 6542, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:20.836599', 'step': 6542, 'epoch': 3} {'type': 'loss', 'content': 0.00032805712544359267, 'timestamp': '2025-09-30 23:11:20.844031', 'step': 6543, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:20.901908', 'step': 6543, 'epoch': 3} {'type': 'loss', 'content': 0.013906469568610191, 'timestamp': '2025-09-30 23:11:20.908469', 'step': 6544, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:20.963048', 'step': 6544, 'epoch': 3} {'type': 'loss', 'content': 0.02123412862420082, 'timestamp': '2025-09-30 23:11:20.966442', 'step': 6545, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:21.020480', 'step': 6545, 'epoch': 3} {'type': 'loss', 'content': 0.004330258816480637, 'timestamp': '2025-09-30 23:11:21.023491', 'step': 6546, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:21.079919', 'step': 6546, 'epoch': 3} {'type': 'loss', 'content': 0.004253110848367214, 'timestamp': '2025-09-30 23:11:21.082110', 'step': 6547, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:21.142920', 'step': 6547, 'epoch': 3} {'type': 'loss', 'content': 0.0011574802920222282, 'timestamp': '2025-09-30 23:11:21.156440', 'step': 6548, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:21.217602', 'step': 6548, 'epoch': 3} {'type': 'loss', 'content': 0.0008437121869064867, 'timestamp': '2025-09-30 23:11:21.221474', 'step': 6549, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:21.277878', 'step': 6549, 'epoch': 3} {'type': 'loss', 'content': 0.00446917861700058, 'timestamp': '2025-09-30 23:11:21.281869', 'step': 6550, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:21.337968', 'step': 6550, 'epoch': 3} {'type': 'loss', 'content': 0.005194136407226324, 'timestamp': '2025-09-30 23:11:21.342469', 'step': 6551, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:21.401061', 'step': 6551, 'epoch': 3} {'type': 'loss', 'content': 0.011735603213310242, 'timestamp': '2025-09-30 23:11:21.407166', 'step': 6552, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:21.461108', 'step': 6552, 'epoch': 3} {'type': 'loss', 'content': 0.022723516449332237, 'timestamp': '2025-09-30 23:11:21.464336', 'step': 6553, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:21.518619', 'step': 6553, 'epoch': 3} {'type': 'loss', 'content': 0.00034845960908569396, 'timestamp': '2025-09-30 23:11:21.522125', 'step': 6554, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:21.590198', 'step': 6554, 'epoch': 3} {'type': 'loss', 'content': 0.0178252924233675, 'timestamp': '2025-09-30 23:11:21.598125', 'step': 6555, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:21.664066', 'step': 6555, 'epoch': 3} {'type': 'loss', 'content': 0.00020702133770100772, 'timestamp': '2025-09-30 23:11:21.671085', 'step': 6556, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:21.727172', 'step': 6556, 'epoch': 3} {'type': 'loss', 'content': 0.002935559954494238, 'timestamp': '2025-09-30 23:11:21.734308', 'step': 6557, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:21.792360', 'step': 6557, 'epoch': 3} {'type': 'loss', 'content': 0.00019519399211276323, 'timestamp': '2025-09-30 23:11:21.796043', 'step': 6558, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:21.856576', 'step': 6558, 'epoch': 3} {'type': 'loss', 'content': 0.002923874882981181, 'timestamp': '2025-09-30 23:11:21.858521', 'step': 6559, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:21.916981', 'step': 6559, 'epoch': 3} {'type': 'loss', 'content': 0.004368710797280073, 'timestamp': '2025-09-30 23:11:21.924933', 'step': 6560, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:21.980075', 'step': 6560, 'epoch': 3} {'type': 'loss', 'content': 0.008036322891712189, 'timestamp': '2025-09-30 23:11:21.983439', 'step': 6561, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:22.040232', 'step': 6561, 'epoch': 3} {'type': 'loss', 'content': 0.010530136525630951, 'timestamp': '2025-09-30 23:11:22.043716', 'step': 6562, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:22.103773', 'step': 6562, 'epoch': 3} {'type': 'loss', 'content': 0.005546038504689932, 'timestamp': '2025-09-30 23:11:22.111883', 'step': 6563, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:22.177061', 'step': 6563, 'epoch': 3} {'type': 'loss', 'content': 0.057362813502550125, 'timestamp': '2025-09-30 23:11:22.185705', 'step': 6564, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:22.247616', 'step': 6564, 'epoch': 3} {'type': 'loss', 'content': 0.014544329605996609, 'timestamp': '2025-09-30 23:11:22.251536', 'step': 6565, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:22.309889', 'step': 6565, 'epoch': 3} {'type': 'loss', 'content': 0.0003852912050206214, 'timestamp': '2025-09-30 23:11:22.312391', 'step': 6566, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:22.373040', 'step': 6566, 'epoch': 3} {'type': 'loss', 'content': 0.017205096781253815, 'timestamp': '2025-09-30 23:11:22.377602', 'step': 6567, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:22.440022', 'step': 6567, 'epoch': 3} {'type': 'loss', 'content': 0.026055069640278816, 'timestamp': '2025-09-30 23:11:22.446791', 'step': 6568, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:22.510739', 'step': 6568, 'epoch': 3} {'type': 'loss', 'content': 0.001918025896884501, 'timestamp': '2025-09-30 23:11:22.515864', 'step': 6569, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:22.575724', 'step': 6569, 'epoch': 3} {'type': 'loss', 'content': 0.024975065141916275, 'timestamp': '2025-09-30 23:11:22.582363', 'step': 6570, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:22.649868', 'step': 6570, 'epoch': 3} {'type': 'loss', 'content': 0.0026141684502363205, 'timestamp': '2025-09-30 23:11:22.653163', 'step': 6571, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:22.720725', 'step': 6571, 'epoch': 3} {'type': 'loss', 'content': 0.008939219638705254, 'timestamp': '2025-09-30 23:11:22.729615', 'step': 6572, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:22.786605', 'step': 6572, 'epoch': 3} {'type': 'loss', 'content': 0.002610257128253579, 'timestamp': '2025-09-30 23:11:22.790615', 'step': 6573, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:22.848486', 'step': 6573, 'epoch': 3} {'type': 'loss', 'content': 0.00036180720780976117, 'timestamp': '2025-09-30 23:11:22.853940', 'step': 6574, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:22.912621', 'step': 6574, 'epoch': 3} {'type': 'loss', 'content': 0.02777162566781044, 'timestamp': '2025-09-30 23:11:22.917072', 'step': 6575, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:22.972534', 'step': 6575, 'epoch': 3} {'type': 'loss', 'content': 0.004814261104911566, 'timestamp': '2025-09-30 23:11:22.979689', 'step': 6576, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:23.036773', 'step': 6576, 'epoch': 3} {'type': 'loss', 'content': 0.0054606287740170956, 'timestamp': '2025-09-30 23:11:23.039566', 'step': 6577, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:23.103222', 'step': 6577, 'epoch': 3} {'type': 'loss', 'content': 0.002316756872460246, 'timestamp': '2025-09-30 23:11:23.106863', 'step': 6578, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:23.163547', 'step': 6578, 'epoch': 3} {'type': 'loss', 'content': 0.004862907342612743, 'timestamp': '2025-09-30 23:11:23.168612', 'step': 6579, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:23.226528', 'step': 6579, 'epoch': 3} {'type': 'loss', 'content': 0.00024302709789481014, 'timestamp': '2025-09-30 23:11:23.234136', 'step': 6580, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:23.294237', 'step': 6580, 'epoch': 3} {'type': 'loss', 'content': 0.0008498784736730158, 'timestamp': '2025-09-30 23:11:23.296508', 'step': 6581, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:11:23.353018', 'step': 6581, 'epoch': 3} {'type': 'loss', 'content': 0.0037251696921885014, 'timestamp': '2025-09-30 23:11:23.357721', 'step': 6582, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:23.415536', 'step': 6582, 'epoch': 3} {'type': 'loss', 'content': 0.03365372493863106, 'timestamp': '2025-09-30 23:11:23.419972', 'step': 6583, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:23.476616', 'step': 6583, 'epoch': 3} {'type': 'loss', 'content': 0.001333780586719513, 'timestamp': '2025-09-30 23:11:23.482557', 'step': 6584, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:23.540258', 'step': 6584, 'epoch': 3} {'type': 'loss', 'content': 0.003489213762804866, 'timestamp': '2025-09-30 23:11:23.543166', 'step': 6585, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:23.599599', 'step': 6585, 'epoch': 3} {'type': 'loss', 'content': 0.009355329908430576, 'timestamp': '2025-09-30 23:11:23.602473', 'step': 6586, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:23.666124', 'step': 6586, 'epoch': 3} {'type': 'loss', 'content': 0.0042180512100458145, 'timestamp': '2025-09-30 23:11:23.673337', 'step': 6587, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:23.733242', 'step': 6587, 'epoch': 3} {'type': 'loss', 'content': 0.0008239491726271808, 'timestamp': '2025-09-30 23:11:23.741564', 'step': 6588, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:23.803199', 'step': 6588, 'epoch': 3} {'type': 'loss', 'content': 0.0018448374466970563, 'timestamp': '2025-09-30 23:11:23.806602', 'step': 6589, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:23.862543', 'step': 6589, 'epoch': 3} {'type': 'loss', 'content': 0.0028587684500962496, 'timestamp': '2025-09-30 23:11:23.866368', 'step': 6590, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:23.925123', 'step': 6590, 'epoch': 3} {'type': 'loss', 'content': 0.012076827697455883, 'timestamp': '2025-09-30 23:11:23.927848', 'step': 6591, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:23.983426', 'step': 6591, 'epoch': 3} {'type': 'loss', 'content': 0.04182480648159981, 'timestamp': '2025-09-30 23:11:23.990739', 'step': 6592, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:24.045917', 'step': 6592, 'epoch': 3} {'type': 'loss', 'content': 0.0012419292470440269, 'timestamp': '2025-09-30 23:11:24.049324', 'step': 6593, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:24.109889', 'step': 6593, 'epoch': 3} {'type': 'loss', 'content': 0.0021593733690679073, 'timestamp': '2025-09-30 23:11:24.120391', 'step': 6594, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:24.199351', 'step': 6594, 'epoch': 3} {'type': 'loss', 'content': 0.0011677785078063607, 'timestamp': '2025-09-30 23:11:24.203520', 'step': 6595, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:24.305056', 'step': 6595, 'epoch': 3} {'type': 'loss', 'content': 0.003926522564142942, 'timestamp': '2025-09-30 23:11:24.324985', 'step': 6596, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:24.425111', 'step': 6596, 'epoch': 3} {'type': 'loss', 'content': 0.0004031125863548368, 'timestamp': '2025-09-30 23:11:24.434754', 'step': 6597, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:24.518388', 'step': 6597, 'epoch': 3} {'type': 'loss', 'content': 0.016325604170560837, 'timestamp': '2025-09-30 23:11:24.537793', 'step': 6598, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:24.630653', 'step': 6598, 'epoch': 3} {'type': 'loss', 'content': 0.001870187115855515, 'timestamp': '2025-09-30 23:11:24.648056', 'step': 6599, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:24.752969', 'step': 6599, 'epoch': 3} {'type': 'loss', 'content': 0.006281630136072636, 'timestamp': '2025-09-30 23:11:24.775033', 'step': 6600, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:24.871178', 'step': 6600, 'epoch': 3} {'type': 'loss', 'content': 0.000688204716425389, 'timestamp': '2025-09-30 23:11:24.874887', 'step': 6601, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:24.948635', 'step': 6601, 'epoch': 3} {'type': 'loss', 'content': 0.0014777762116864324, 'timestamp': '2025-09-30 23:11:24.954860', 'step': 6602, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:11:25.017097', 'step': 6602, 'epoch': 3} {'type': 'loss', 'content': 0.001138614141382277, 'timestamp': '2025-09-30 23:11:25.020706', 'step': 6603, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:25.097519', 'step': 6603, 'epoch': 3} {'type': 'loss', 'content': 0.0036743241362273693, 'timestamp': '2025-09-30 23:11:25.109903', 'step': 6604, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:11:25.183090', 'step': 6604, 'epoch': 3} {'type': 'loss', 'content': 0.0032735986169427633, 'timestamp': '2025-09-30 23:11:25.187252', 'step': 6605, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:25.245155', 'step': 6605, 'epoch': 3} {'type': 'loss', 'content': 0.0005494471988640726, 'timestamp': '2025-09-30 23:11:25.248526', 'step': 6606, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:25.307316', 'step': 6606, 'epoch': 3} {'type': 'loss', 'content': 0.0032114891801029444, 'timestamp': '2025-09-30 23:11:25.315540', 'step': 6607, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:25.387808', 'step': 6607, 'epoch': 3} {'type': 'loss', 'content': 0.0003706454881466925, 'timestamp': '2025-09-30 23:11:25.397369', 'step': 6608, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:25.455870', 'step': 6608, 'epoch': 3} {'type': 'loss', 'content': 0.0010714821983128786, 'timestamp': '2025-09-30 23:11:25.460137', 'step': 6609, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:25.520891', 'step': 6609, 'epoch': 3} {'type': 'loss', 'content': 0.01866901107132435, 'timestamp': '2025-09-30 23:11:25.527437', 'step': 6610, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:25.592898', 'step': 6610, 'epoch': 3} {'type': 'loss', 'content': 0.0008354093297384679, 'timestamp': '2025-09-30 23:11:25.597166', 'step': 6611, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:25.658299', 'step': 6611, 'epoch': 3} {'type': 'loss', 'content': 0.00033476995304226875, 'timestamp': '2025-09-30 23:11:25.664660', 'step': 6612, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:25.732033', 'step': 6612, 'epoch': 3} {'type': 'loss', 'content': 0.0010793808614835143, 'timestamp': '2025-09-30 23:11:25.734604', 'step': 6613, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:25.796915', 'step': 6613, 'epoch': 3} {'type': 'loss', 'content': 0.001870637061074376, 'timestamp': '2025-09-30 23:11:25.802674', 'step': 6614, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:25.861262', 'step': 6614, 'epoch': 3} {'type': 'loss', 'content': 0.00030791937024332583, 'timestamp': '2025-09-30 23:11:25.864434', 'step': 6615, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:25.920248', 'step': 6615, 'epoch': 3} {'type': 'loss', 'content': 0.0006205198587849736, 'timestamp': '2025-09-30 23:11:25.927725', 'step': 6616, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:25.982915', 'step': 6616, 'epoch': 3} {'type': 'loss', 'content': 0.0005126795149408281, 'timestamp': '2025-09-30 23:11:25.990188', 'step': 6617, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:26.051232', 'step': 6617, 'epoch': 3} {'type': 'loss', 'content': 0.00011617562995525077, 'timestamp': '2025-09-30 23:11:26.060561', 'step': 6618, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:26.135659', 'step': 6618, 'epoch': 3} {'type': 'loss', 'content': 0.0013974476605653763, 'timestamp': '2025-09-30 23:11:26.141406', 'step': 6619, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:26.215491', 'step': 6619, 'epoch': 3} {'type': 'loss', 'content': 0.03420665115118027, 'timestamp': '2025-09-30 23:11:26.221395', 'step': 6620, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:26.283601', 'step': 6620, 'epoch': 3} {'type': 'loss', 'content': 0.00017319558537565172, 'timestamp': '2025-09-30 23:11:26.287995', 'step': 6621, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:26.347803', 'step': 6621, 'epoch': 3} {'type': 'loss', 'content': 0.00036509771598502994, 'timestamp': '2025-09-30 23:11:26.350472', 'step': 6622, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:26.411065', 'step': 6622, 'epoch': 3} {'type': 'loss', 'content': 0.0010960304643958807, 'timestamp': '2025-09-30 23:11:26.414237', 'step': 6623, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:26.470729', 'step': 6623, 'epoch': 3} {'type': 'loss', 'content': 0.0011853101896122098, 'timestamp': '2025-09-30 23:11:26.480016', 'step': 6624, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:26.538692', 'step': 6624, 'epoch': 3} {'type': 'loss', 'content': 0.00043144176015630364, 'timestamp': '2025-09-30 23:11:26.542308', 'step': 6625, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:26.597894', 'step': 6625, 'epoch': 3} {'type': 'loss', 'content': 0.0009220215724781156, 'timestamp': '2025-09-30 23:11:26.601188', 'step': 6626, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:26.655556', 'step': 6626, 'epoch': 3} {'type': 'loss', 'content': 0.01429868582636118, 'timestamp': '2025-09-30 23:11:26.662465', 'step': 6627, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:26.728901', 'step': 6627, 'epoch': 3} {'type': 'loss', 'content': 0.0016511686844751239, 'timestamp': '2025-09-30 23:11:26.740817', 'step': 6628, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:26.808197', 'step': 6628, 'epoch': 3} {'type': 'loss', 'content': 0.0022447991650551558, 'timestamp': '2025-09-30 23:11:26.812521', 'step': 6629, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:26.880111', 'step': 6629, 'epoch': 3} {'type': 'loss', 'content': 0.0004200223775114864, 'timestamp': '2025-09-30 23:11:26.888719', 'step': 6630, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:26.951580', 'step': 6630, 'epoch': 3} {'type': 'loss', 'content': 0.00020721254986710846, 'timestamp': '2025-09-30 23:11:26.954397', 'step': 6631, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:27.016483', 'step': 6631, 'epoch': 3} {'type': 'loss', 'content': 0.004339511506259441, 'timestamp': '2025-09-30 23:11:27.027270', 'step': 6632, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:27.091265', 'step': 6632, 'epoch': 3} {'type': 'loss', 'content': 0.0006867131451144814, 'timestamp': '2025-09-30 23:11:27.099805', 'step': 6633, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:27.163258', 'step': 6633, 'epoch': 3} {'type': 'loss', 'content': 0.0003496904100757092, 'timestamp': '2025-09-30 23:11:27.166187', 'step': 6634, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:27.222905', 'step': 6634, 'epoch': 3} {'type': 'loss', 'content': 0.0008265222422778606, 'timestamp': '2025-09-30 23:11:27.229756', 'step': 6635, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:27.296993', 'step': 6635, 'epoch': 3} {'type': 'loss', 'content': 0.012033904902637005, 'timestamp': '2025-09-30 23:11:27.308927', 'step': 6636, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:27.375306', 'step': 6636, 'epoch': 3} {'type': 'loss', 'content': 0.00017027862486429513, 'timestamp': '2025-09-30 23:11:27.379762', 'step': 6637, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:27.446466', 'step': 6637, 'epoch': 3} {'type': 'loss', 'content': 8.940533734858036e-05, 'timestamp': '2025-09-30 23:11:27.449512', 'step': 6638, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:27.513287', 'step': 6638, 'epoch': 3} {'type': 'loss', 'content': 0.001429613446816802, 'timestamp': '2025-09-30 23:11:27.521779', 'step': 6639, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:27.593790', 'step': 6639, 'epoch': 3} {'type': 'loss', 'content': 0.0003254961338825524, 'timestamp': '2025-09-30 23:11:27.599791', 'step': 6640, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:27.667368', 'step': 6640, 'epoch': 3} {'type': 'loss', 'content': 0.0005772101576440036, 'timestamp': '2025-09-30 23:11:27.669690', 'step': 6641, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:27.737797', 'step': 6641, 'epoch': 3} {'type': 'loss', 'content': 0.0005424198461696506, 'timestamp': '2025-09-30 23:11:27.747501', 'step': 6642, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:27.828626', 'step': 6642, 'epoch': 3} {'type': 'loss', 'content': 0.0001878004113677889, 'timestamp': '2025-09-30 23:11:27.831326', 'step': 6643, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:27.904435', 'step': 6643, 'epoch': 3} {'type': 'loss', 'content': 0.0006912358803674579, 'timestamp': '2025-09-30 23:11:27.913942', 'step': 6644, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:27.990847', 'step': 6644, 'epoch': 3} {'type': 'loss', 'content': 0.002110767178237438, 'timestamp': '2025-09-30 23:11:27.998950', 'step': 6645, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:28.062805', 'step': 6645, 'epoch': 3} {'type': 'loss', 'content': 0.00041522126412019134, 'timestamp': '2025-09-30 23:11:28.065846', 'step': 6646, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:28.134151', 'step': 6646, 'epoch': 3} {'type': 'loss', 'content': 0.0008692844421602786, 'timestamp': '2025-09-30 23:11:28.143481', 'step': 6647, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:11:28.214807', 'step': 6647, 'epoch': 3} {'type': 'loss', 'content': 0.000215913139982149, 'timestamp': '2025-09-30 23:11:28.222000', 'step': 6648, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:28.283687', 'step': 6648, 'epoch': 3} {'type': 'loss', 'content': 0.0006932065007276833, 'timestamp': '2025-09-30 23:11:28.291784', 'step': 6649, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:28.364603', 'step': 6649, 'epoch': 3} {'type': 'loss', 'content': 0.0007445593364536762, 'timestamp': '2025-09-30 23:11:28.370910', 'step': 6650, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:28.437843', 'step': 6650, 'epoch': 3} {'type': 'loss', 'content': 0.0004506034019868821, 'timestamp': '2025-09-30 23:11:28.445535', 'step': 6651, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:28.511236', 'step': 6651, 'epoch': 3} {'type': 'loss', 'content': 0.015497148968279362, 'timestamp': '2025-09-30 23:11:28.517812', 'step': 6652, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:28.579542', 'step': 6652, 'epoch': 3} {'type': 'loss', 'content': 0.00017829284479375929, 'timestamp': '2025-09-30 23:11:28.586793', 'step': 6653, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:28.655658', 'step': 6653, 'epoch': 3} {'type': 'loss', 'content': 0.00016741559375077486, 'timestamp': '2025-09-30 23:11:28.659018', 'step': 6654, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:28.726328', 'step': 6654, 'epoch': 3} {'type': 'loss', 'content': 0.0003210493887308985, 'timestamp': '2025-09-30 23:11:28.733009', 'step': 6655, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:28.791113', 'step': 6655, 'epoch': 3} {'type': 'loss', 'content': 8.688640082255006e-05, 'timestamp': '2025-09-30 23:11:28.798858', 'step': 6656, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:28.853896', 'step': 6656, 'epoch': 3} {'type': 'loss', 'content': 0.0003130044788122177, 'timestamp': '2025-09-30 23:11:28.859413', 'step': 6657, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:28.923998', 'step': 6657, 'epoch': 3} {'type': 'loss', 'content': 0.000234150342294015, 'timestamp': '2025-09-30 23:11:28.926949', 'step': 6658, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:28.989693', 'step': 6658, 'epoch': 3} {'type': 'loss', 'content': 3.969268800574355e-05, 'timestamp': '2025-09-30 23:11:28.993823', 'step': 6659, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:29.050358', 'step': 6659, 'epoch': 3} {'type': 'loss', 'content': 6.167282845126465e-05, 'timestamp': '2025-09-30 23:11:29.057430', 'step': 6660, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:29.115032', 'step': 6660, 'epoch': 3} {'type': 'loss', 'content': 7.300781726371497e-05, 'timestamp': '2025-09-30 23:11:29.118672', 'step': 6661, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:29.182269', 'step': 6661, 'epoch': 3} {'type': 'loss', 'content': 0.0004406773077789694, 'timestamp': '2025-09-30 23:11:29.188785', 'step': 6662, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:29.249446', 'step': 6662, 'epoch': 3} {'type': 'loss', 'content': 0.0007851775735616684, 'timestamp': '2025-09-30 23:11:29.252490', 'step': 6663, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:29.309707', 'step': 6663, 'epoch': 3} {'type': 'loss', 'content': 5.7166875194525346e-05, 'timestamp': '2025-09-30 23:11:29.317409', 'step': 6664, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:29.378731', 'step': 6664, 'epoch': 3} {'type': 'loss', 'content': 0.005096358712762594, 'timestamp': '2025-09-30 23:11:29.382381', 'step': 6665, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:29.444702', 'step': 6665, 'epoch': 3} {'type': 'loss', 'content': 0.0006414343952201307, 'timestamp': '2025-09-30 23:11:29.454434', 'step': 6666, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:29.524550', 'step': 6666, 'epoch': 3} {'type': 'loss', 'content': 0.00038074699114076793, 'timestamp': '2025-09-30 23:11:29.528356', 'step': 6667, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:29.603444', 'step': 6667, 'epoch': 3} {'type': 'loss', 'content': 0.0007953996537253261, 'timestamp': '2025-09-30 23:11:29.615972', 'step': 6668, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:29.684611', 'step': 6668, 'epoch': 3} {'type': 'loss', 'content': 0.00032657082192599773, 'timestamp': '2025-09-30 23:11:29.687974', 'step': 6669, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:29.747402', 'step': 6669, 'epoch': 3} {'type': 'loss', 'content': 0.00010839151218533516, 'timestamp': '2025-09-30 23:11:29.751024', 'step': 6670, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:29.823142', 'step': 6670, 'epoch': 3} {'type': 'loss', 'content': 0.00033237357274629176, 'timestamp': '2025-09-30 23:11:29.826337', 'step': 6671, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:29.891746', 'step': 6671, 'epoch': 3} {'type': 'loss', 'content': 0.0008879908127710223, 'timestamp': '2025-09-30 23:11:29.898954', 'step': 6672, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:29.955880', 'step': 6672, 'epoch': 3} {'type': 'loss', 'content': 0.005533948075026274, 'timestamp': '2025-09-30 23:11:29.960989', 'step': 6673, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:30.031730', 'step': 6673, 'epoch': 3} {'type': 'loss', 'content': 0.00016211574256885797, 'timestamp': '2025-09-30 23:11:30.041530', 'step': 6674, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:30.105158', 'step': 6674, 'epoch': 3} {'type': 'loss', 'content': 0.0003841350262518972, 'timestamp': '2025-09-30 23:11:30.110198', 'step': 6675, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:30.169058', 'step': 6675, 'epoch': 3} {'type': 'loss', 'content': 0.0041551049798727036, 'timestamp': '2025-09-30 23:11:30.176816', 'step': 6676, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:30.240228', 'step': 6676, 'epoch': 3} {'type': 'loss', 'content': 0.022528523579239845, 'timestamp': '2025-09-30 23:11:30.244097', 'step': 6677, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:30.303082', 'step': 6677, 'epoch': 3} {'type': 'loss', 'content': 5.956885433988646e-05, 'timestamp': '2025-09-30 23:11:30.318871', 'step': 6678, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:30.385189', 'step': 6678, 'epoch': 3} {'type': 'loss', 'content': 0.0001372249098494649, 'timestamp': '2025-09-30 23:11:30.388521', 'step': 6679, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:30.443912', 'step': 6679, 'epoch': 3} {'type': 'loss', 'content': 0.052596934139728546, 'timestamp': '2025-09-30 23:11:30.451991', 'step': 6680, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:30.506531', 'step': 6680, 'epoch': 3} {'type': 'loss', 'content': 0.01355945598334074, 'timestamp': '2025-09-30 23:11:30.509299', 'step': 6681, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:30.563805', 'step': 6681, 'epoch': 3} {'type': 'loss', 'content': 0.00012412841897457838, 'timestamp': '2025-09-30 23:11:30.571003', 'step': 6682, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:30.627896', 'step': 6682, 'epoch': 3} {'type': 'loss', 'content': 0.001433253288269043, 'timestamp': '2025-09-30 23:11:30.635179', 'step': 6683, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:30.700565', 'step': 6683, 'epoch': 3} {'type': 'loss', 'content': 0.00012605678057298064, 'timestamp': '2025-09-30 23:11:30.706617', 'step': 6684, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:30.763052', 'step': 6684, 'epoch': 3} {'type': 'loss', 'content': 0.00018972261750604957, 'timestamp': '2025-09-30 23:11:30.766736', 'step': 6685, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:30.828268', 'step': 6685, 'epoch': 3} {'type': 'loss', 'content': 0.0012044628383591771, 'timestamp': '2025-09-30 23:11:30.831903', 'step': 6686, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:30.890020', 'step': 6686, 'epoch': 3} {'type': 'loss', 'content': 0.00014251044194679707, 'timestamp': '2025-09-30 23:11:30.893214', 'step': 6687, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:30.948338', 'step': 6687, 'epoch': 3} {'type': 'loss', 'content': 0.02878803201019764, 'timestamp': '2025-09-30 23:11:30.955697', 'step': 6688, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1596914505344}], 'timestamp': '2025-09-30 23:11:35.189201', 'step': 6688, 'epoch': 3} {'type': 'pplx', 'content': 9298546.294962628, 'timestamp': '2025-09-30 23:11:35.192776', 'step': 6688, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:35.257024', 'step': 6688, 'epoch': 3} {'type': 'loss', 'content': 0.02493237890303135, 'timestamp': '2025-09-30 23:11:35.263484', 'step': 6689, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:35.337782', 'step': 6689, 'epoch': 3} {'type': 'loss', 'content': 0.009786057285964489, 'timestamp': '2025-09-30 23:11:35.344846', 'step': 6690, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:35.420332', 'step': 6690, 'epoch': 3} {'type': 'loss', 'content': 0.00016950278950389475, 'timestamp': '2025-09-30 23:11:35.426932', 'step': 6691, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:35.498411', 'step': 6691, 'epoch': 3} {'type': 'loss', 'content': 4.4588767195818946e-05, 'timestamp': '2025-09-30 23:11:35.510129', 'step': 6692, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:35.584543', 'step': 6692, 'epoch': 3} {'type': 'loss', 'content': 0.0002896938822232187, 'timestamp': '2025-09-30 23:11:35.587377', 'step': 6693, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:35.659558', 'step': 6693, 'epoch': 3} {'type': 'loss', 'content': 0.0008094012155197561, 'timestamp': '2025-09-30 23:11:35.667080', 'step': 6694, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:35.731033', 'step': 6694, 'epoch': 3} {'type': 'loss', 'content': 0.01822427473962307, 'timestamp': '2025-09-30 23:11:35.738131', 'step': 6695, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:35.804302', 'step': 6695, 'epoch': 3} {'type': 'loss', 'content': 0.0009018058190122247, 'timestamp': '2025-09-30 23:11:35.811696', 'step': 6696, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:35.881095', 'step': 6696, 'epoch': 3} {'type': 'loss', 'content': 0.0010525175603106618, 'timestamp': '2025-09-30 23:11:35.883885', 'step': 6697, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:35.955941', 'step': 6697, 'epoch': 3} {'type': 'loss', 'content': 0.0002564824535511434, 'timestamp': '2025-09-30 23:11:35.959249', 'step': 6698, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:36.024702', 'step': 6698, 'epoch': 3} {'type': 'loss', 'content': 0.00807518232613802, 'timestamp': '2025-09-30 23:11:36.028074', 'step': 6699, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:36.097729', 'step': 6699, 'epoch': 3} {'type': 'loss', 'content': 0.00015680970682296902, 'timestamp': '2025-09-30 23:11:36.108269', 'step': 6700, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:36.173198', 'step': 6700, 'epoch': 3} {'type': 'loss', 'content': 0.0027430220507085323, 'timestamp': '2025-09-30 23:11:36.177005', 'step': 6701, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:36.250937', 'step': 6701, 'epoch': 3} {'type': 'loss', 'content': 0.01802058517932892, 'timestamp': '2025-09-30 23:11:36.258796', 'step': 6702, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:36.330801', 'step': 6702, 'epoch': 3} {'type': 'loss', 'content': 0.00014425817062146962, 'timestamp': '2025-09-30 23:11:36.338387', 'step': 6703, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:36.412343', 'step': 6703, 'epoch': 3} {'type': 'loss', 'content': 0.0005790383438579738, 'timestamp': '2025-09-30 23:11:36.421692', 'step': 6704, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:36.486998', 'step': 6704, 'epoch': 3} {'type': 'loss', 'content': 0.00225987215526402, 'timestamp': '2025-09-30 23:11:36.493727', 'step': 6705, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:36.553715', 'step': 6705, 'epoch': 3} {'type': 'loss', 'content': 9.134124411502853e-05, 'timestamp': '2025-09-30 23:11:36.566581', 'step': 6706, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:36.646416', 'step': 6706, 'epoch': 3} {'type': 'loss', 'content': 0.0010368525981903076, 'timestamp': '2025-09-30 23:11:36.656531', 'step': 6707, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:36.728852', 'step': 6707, 'epoch': 3} {'type': 'loss', 'content': 0.0006484551704488695, 'timestamp': '2025-09-30 23:11:36.739830', 'step': 6708, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:36.797494', 'step': 6708, 'epoch': 3} {'type': 'loss', 'content': 0.00047603953862562776, 'timestamp': '2025-09-30 23:11:36.807818', 'step': 6709, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:36.878434', 'step': 6709, 'epoch': 3} {'type': 'loss', 'content': 4.2247767851222306e-05, 'timestamp': '2025-09-30 23:11:36.881570', 'step': 6710, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:36.949560', 'step': 6710, 'epoch': 3} {'type': 'loss', 'content': 0.03691757470369339, 'timestamp': '2025-09-30 23:11:36.953533', 'step': 6711, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:37.012231', 'step': 6711, 'epoch': 3} {'type': 'loss', 'content': 0.0010575383203104138, 'timestamp': '2025-09-30 23:11:37.020646', 'step': 6712, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:37.089411', 'step': 6712, 'epoch': 3} {'type': 'loss', 'content': 9.660228533903137e-05, 'timestamp': '2025-09-30 23:11:37.096138', 'step': 6713, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:37.161688', 'step': 6713, 'epoch': 3} {'type': 'loss', 'content': 0.0015490499790757895, 'timestamp': '2025-09-30 23:11:37.164746', 'step': 6714, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:37.234267', 'step': 6714, 'epoch': 3} {'type': 'loss', 'content': 0.00022213414194993675, 'timestamp': '2025-09-30 23:11:37.239579', 'step': 6715, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:37.299855', 'step': 6715, 'epoch': 3} {'type': 'loss', 'content': 0.00020049375598318875, 'timestamp': '2025-09-30 23:11:37.306816', 'step': 6716, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:37.371934', 'step': 6716, 'epoch': 3} {'type': 'loss', 'content': 7.982965325936675e-05, 'timestamp': '2025-09-30 23:11:37.379396', 'step': 6717, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:37.441331', 'step': 6717, 'epoch': 3} {'type': 'loss', 'content': 0.00024254580785054713, 'timestamp': '2025-09-30 23:11:37.444305', 'step': 6718, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:37.508343', 'step': 6718, 'epoch': 3} {'type': 'loss', 'content': 0.04476068541407585, 'timestamp': '2025-09-30 23:11:37.513593', 'step': 6719, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:37.580148', 'step': 6719, 'epoch': 3} {'type': 'loss', 'content': 3.347303209011443e-05, 'timestamp': '2025-09-30 23:11:37.587625', 'step': 6720, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:37.644766', 'step': 6720, 'epoch': 3} {'type': 'loss', 'content': 5.115993189974688e-05, 'timestamp': '2025-09-30 23:11:37.647660', 'step': 6721, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:37.715581', 'step': 6721, 'epoch': 3} {'type': 'loss', 'content': 0.0004815535503439605, 'timestamp': '2025-09-30 23:11:37.720414', 'step': 6722, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:37.783972', 'step': 6722, 'epoch': 3} {'type': 'loss', 'content': 0.009659717790782452, 'timestamp': '2025-09-30 23:11:37.790503', 'step': 6723, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:37.857089', 'step': 6723, 'epoch': 3} {'type': 'loss', 'content': 0.00013860614853911102, 'timestamp': '2025-09-30 23:11:37.864671', 'step': 6724, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:37.926383', 'step': 6724, 'epoch': 3} {'type': 'loss', 'content': 7.6989563240204e-05, 'timestamp': '2025-09-30 23:11:37.937057', 'step': 6725, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:37.999073', 'step': 6725, 'epoch': 3} {'type': 'loss', 'content': 0.0002934793010354042, 'timestamp': '2025-09-30 23:11:38.002967', 'step': 6726, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:38.062625', 'step': 6726, 'epoch': 3} {'type': 'loss', 'content': 0.00017192562518175691, 'timestamp': '2025-09-30 23:11:38.066507', 'step': 6727, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:38.132065', 'step': 6727, 'epoch': 3} {'type': 'loss', 'content': 0.013630757108330727, 'timestamp': '2025-09-30 23:11:38.139338', 'step': 6728, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:38.201629', 'step': 6728, 'epoch': 3} {'type': 'loss', 'content': 0.0009820127161219716, 'timestamp': '2025-09-30 23:11:38.203980', 'step': 6729, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:38.258624', 'step': 6729, 'epoch': 3} {'type': 'loss', 'content': 0.0008390368893742561, 'timestamp': '2025-09-30 23:11:38.265052', 'step': 6730, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:38.324475', 'step': 6730, 'epoch': 3} {'type': 'loss', 'content': 0.0059381588362157345, 'timestamp': '2025-09-30 23:11:38.328320', 'step': 6731, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:38.386346', 'step': 6731, 'epoch': 3} {'type': 'loss', 'content': 0.0008120556594803929, 'timestamp': '2025-09-30 23:11:38.393274', 'step': 6732, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:38.451052', 'step': 6732, 'epoch': 3} {'type': 'loss', 'content': 0.001139410655014217, 'timestamp': '2025-09-30 23:11:38.453774', 'step': 6733, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:38.512875', 'step': 6733, 'epoch': 3} {'type': 'loss', 'content': 9.456386760575697e-05, 'timestamp': '2025-09-30 23:11:38.516863', 'step': 6734, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:38.574477', 'step': 6734, 'epoch': 3} {'type': 'loss', 'content': 0.022858334705233574, 'timestamp': '2025-09-30 23:11:38.589829', 'step': 6735, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:38.665298', 'step': 6735, 'epoch': 3} {'type': 'loss', 'content': 0.0014226734638214111, 'timestamp': '2025-09-30 23:11:38.680097', 'step': 6736, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:38.737717', 'step': 6736, 'epoch': 3} {'type': 'loss', 'content': 0.009812797419726849, 'timestamp': '2025-09-30 23:11:38.741033', 'step': 6737, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:38.809094', 'step': 6737, 'epoch': 3} {'type': 'loss', 'content': 0.0001862764183897525, 'timestamp': '2025-09-30 23:11:38.815504', 'step': 6738, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:38.880753', 'step': 6738, 'epoch': 3} {'type': 'loss', 'content': 7.81866765464656e-05, 'timestamp': '2025-09-30 23:11:38.886119', 'step': 6739, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:38.946048', 'step': 6739, 'epoch': 3} {'type': 'loss', 'content': 0.00043040228774771094, 'timestamp': '2025-09-30 23:11:38.954027', 'step': 6740, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:39.014204', 'step': 6740, 'epoch': 3} {'type': 'loss', 'content': 0.0010907779214903712, 'timestamp': '2025-09-30 23:11:39.021536', 'step': 6741, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:39.098254', 'step': 6741, 'epoch': 3} {'type': 'loss', 'content': 0.004460308235138655, 'timestamp': '2025-09-30 23:11:39.106566', 'step': 6742, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:39.169878', 'step': 6742, 'epoch': 3} {'type': 'loss', 'content': 0.00019648412126116455, 'timestamp': '2025-09-30 23:11:39.174262', 'step': 6743, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:39.234278', 'step': 6743, 'epoch': 3} {'type': 'loss', 'content': 0.00030613350099883974, 'timestamp': '2025-09-30 23:11:39.243743', 'step': 6744, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:39.304137', 'step': 6744, 'epoch': 3} {'type': 'loss', 'content': 0.0003091927501372993, 'timestamp': '2025-09-30 23:11:39.308326', 'step': 6745, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:39.370426', 'step': 6745, 'epoch': 3} {'type': 'loss', 'content': 0.00297992629930377, 'timestamp': '2025-09-30 23:11:39.382720', 'step': 6746, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:39.455577', 'step': 6746, 'epoch': 3} {'type': 'loss', 'content': 0.0016171453753486276, 'timestamp': '2025-09-30 23:11:39.458647', 'step': 6747, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:39.533759', 'step': 6747, 'epoch': 3} {'type': 'loss', 'content': 0.0001314938854193315, 'timestamp': '2025-09-30 23:11:39.542366', 'step': 6748, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:39.606713', 'step': 6748, 'epoch': 3} {'type': 'loss', 'content': 8.076961239567026e-05, 'timestamp': '2025-09-30 23:11:39.612866', 'step': 6749, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:39.667675', 'step': 6749, 'epoch': 3} {'type': 'loss', 'content': 0.0001341834431514144, 'timestamp': '2025-09-30 23:11:39.672395', 'step': 6750, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:39.732237', 'step': 6750, 'epoch': 3} {'type': 'loss', 'content': 0.00036297336919233203, 'timestamp': '2025-09-30 23:11:39.736439', 'step': 6751, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:39.803010', 'step': 6751, 'epoch': 3} {'type': 'loss', 'content': 0.00018011641805060208, 'timestamp': '2025-09-30 23:11:39.811810', 'step': 6752, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:39.870041', 'step': 6752, 'epoch': 3} {'type': 'loss', 'content': 0.0060573057271540165, 'timestamp': '2025-09-30 23:11:39.872854', 'step': 6753, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:39.931304', 'step': 6753, 'epoch': 3} {'type': 'loss', 'content': 2.176724410674069e-05, 'timestamp': '2025-09-30 23:11:39.933990', 'step': 6754, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:39.993912', 'step': 6754, 'epoch': 3} {'type': 'loss', 'content': 0.00017169279453810304, 'timestamp': '2025-09-30 23:11:39.997999', 'step': 6755, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:40.057037', 'step': 6755, 'epoch': 3} {'type': 'loss', 'content': 0.0002193172840634361, 'timestamp': '2025-09-30 23:11:40.066116', 'step': 6756, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:40.128378', 'step': 6756, 'epoch': 3} {'type': 'loss', 'content': 0.001276761176995933, 'timestamp': '2025-09-30 23:11:40.132648', 'step': 6757, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:40.196581', 'step': 6757, 'epoch': 3} {'type': 'loss', 'content': 0.00020613193919416517, 'timestamp': '2025-09-30 23:11:40.203718', 'step': 6758, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:40.262157', 'step': 6758, 'epoch': 3} {'type': 'loss', 'content': 0.0007613040506839752, 'timestamp': '2025-09-30 23:11:40.268940', 'step': 6759, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:40.330203', 'step': 6759, 'epoch': 3} {'type': 'loss', 'content': 0.0074094198644161224, 'timestamp': '2025-09-30 23:11:40.336801', 'step': 6760, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:40.396434', 'step': 6760, 'epoch': 3} {'type': 'loss', 'content': 0.003804705571383238, 'timestamp': '2025-09-30 23:11:40.399067', 'step': 6761, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:40.457311', 'step': 6761, 'epoch': 3} {'type': 'loss', 'content': 0.018083740025758743, 'timestamp': '2025-09-30 23:11:40.462194', 'step': 6762, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:40.525305', 'step': 6762, 'epoch': 3} {'type': 'loss', 'content': 0.0043103969655931, 'timestamp': '2025-09-30 23:11:40.528394', 'step': 6763, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:40.587519', 'step': 6763, 'epoch': 3} {'type': 'loss', 'content': 0.0023545653093606234, 'timestamp': '2025-09-30 23:11:40.593612', 'step': 6764, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:40.650536', 'step': 6764, 'epoch': 3} {'type': 'loss', 'content': 9.620614582672715e-05, 'timestamp': '2025-09-30 23:11:40.656252', 'step': 6765, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:40.720766', 'step': 6765, 'epoch': 3} {'type': 'loss', 'content': 0.002702414756640792, 'timestamp': '2025-09-30 23:11:40.725913', 'step': 6766, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:40.789965', 'step': 6766, 'epoch': 3} {'type': 'loss', 'content': 6.96635979693383e-05, 'timestamp': '2025-09-30 23:11:40.793675', 'step': 6767, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:40.852233', 'step': 6767, 'epoch': 3} {'type': 'loss', 'content': 0.0012036676052957773, 'timestamp': '2025-09-30 23:11:40.860165', 'step': 6768, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:40.921359', 'step': 6768, 'epoch': 3} {'type': 'loss', 'content': 0.001155877485871315, 'timestamp': '2025-09-30 23:11:40.929876', 'step': 6769, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:40.992946', 'step': 6769, 'epoch': 3} {'type': 'loss', 'content': 0.002082414459437132, 'timestamp': '2025-09-30 23:11:40.997308', 'step': 6770, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:11:41.059421', 'step': 6770, 'epoch': 3} {'type': 'loss', 'content': 0.00012068292562616989, 'timestamp': '2025-09-30 23:11:41.063160', 'step': 6771, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:41.124562', 'step': 6771, 'epoch': 3} {'type': 'loss', 'content': 0.009071514941751957, 'timestamp': '2025-09-30 23:11:41.137505', 'step': 6772, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:41.212729', 'step': 6772, 'epoch': 3} {'type': 'loss', 'content': 0.01102709211409092, 'timestamp': '2025-09-30 23:11:41.220070', 'step': 6773, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:41.282661', 'step': 6773, 'epoch': 3} {'type': 'loss', 'content': 0.0004295706457924098, 'timestamp': '2025-09-30 23:11:41.287999', 'step': 6774, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:41.351630', 'step': 6774, 'epoch': 3} {'type': 'loss', 'content': 4.2372270399937406e-05, 'timestamp': '2025-09-30 23:11:41.358611', 'step': 6775, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:11:41.422850', 'step': 6775, 'epoch': 3} {'type': 'loss', 'content': 0.0028095135930925608, 'timestamp': '2025-09-30 23:11:41.430262', 'step': 6776, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:41.486482', 'step': 6776, 'epoch': 3} {'type': 'loss', 'content': 0.0002779097412712872, 'timestamp': '2025-09-30 23:11:41.490116', 'step': 6777, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:41.546802', 'step': 6777, 'epoch': 3} {'type': 'loss', 'content': 0.006724574137479067, 'timestamp': '2025-09-30 23:11:41.550320', 'step': 6778, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:41.640519', 'step': 6778, 'epoch': 3} {'type': 'loss', 'content': 0.0003156990569550544, 'timestamp': '2025-09-30 23:11:41.648970', 'step': 6779, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:41.708807', 'step': 6779, 'epoch': 3} {'type': 'loss', 'content': 7.009017281234264e-05, 'timestamp': '2025-09-30 23:11:41.719160', 'step': 6780, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:41.781721', 'step': 6780, 'epoch': 3} {'type': 'loss', 'content': 0.0013980717631056905, 'timestamp': '2025-09-30 23:11:41.786362', 'step': 6781, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:41.856905', 'step': 6781, 'epoch': 3} {'type': 'loss', 'content': 0.0024696162436157465, 'timestamp': '2025-09-30 23:11:41.861028', 'step': 6782, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:41.921484', 'step': 6782, 'epoch': 3} {'type': 'loss', 'content': 0.00639027776196599, 'timestamp': '2025-09-30 23:11:41.928393', 'step': 6783, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:42.001703', 'step': 6783, 'epoch': 3} {'type': 'loss', 'content': 1.964173497981392e-05, 'timestamp': '2025-09-30 23:11:42.010752', 'step': 6784, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:42.065968', 'step': 6784, 'epoch': 3} {'type': 'loss', 'content': 0.012113398872315884, 'timestamp': '2025-09-30 23:11:42.069897', 'step': 6785, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:42.132604', 'step': 6785, 'epoch': 3} {'type': 'loss', 'content': 4.136071584071033e-05, 'timestamp': '2025-09-30 23:11:42.135649', 'step': 6786, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:42.193267', 'step': 6786, 'epoch': 3} {'type': 'loss', 'content': 9.29881352931261e-05, 'timestamp': '2025-09-30 23:11:42.195825', 'step': 6787, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:42.259015', 'step': 6787, 'epoch': 3} {'type': 'loss', 'content': 0.011588326655328274, 'timestamp': '2025-09-30 23:11:42.268923', 'step': 6788, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:42.346513', 'step': 6788, 'epoch': 3} {'type': 'loss', 'content': 3.208723137504421e-05, 'timestamp': '2025-09-30 23:11:42.350084', 'step': 6789, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:42.407295', 'step': 6789, 'epoch': 3} {'type': 'loss', 'content': 5.869276719749905e-05, 'timestamp': '2025-09-30 23:11:42.413030', 'step': 6790, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:42.472681', 'step': 6790, 'epoch': 3} {'type': 'loss', 'content': 0.007766692899167538, 'timestamp': '2025-09-30 23:11:42.476009', 'step': 6791, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:42.560885', 'step': 6791, 'epoch': 3} {'type': 'loss', 'content': 4.2955543904099613e-05, 'timestamp': '2025-09-30 23:11:42.571863', 'step': 6792, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:42.636230', 'step': 6792, 'epoch': 3} {'type': 'loss', 'content': 0.00168079964350909, 'timestamp': '2025-09-30 23:11:42.639248', 'step': 6793, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:42.696099', 'step': 6793, 'epoch': 3} {'type': 'loss', 'content': 0.00021696535986848176, 'timestamp': '2025-09-30 23:11:42.700050', 'step': 6794, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:42.757974', 'step': 6794, 'epoch': 3} {'type': 'loss', 'content': 0.003924703225493431, 'timestamp': '2025-09-30 23:11:42.760600', 'step': 6795, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:42.815897', 'step': 6795, 'epoch': 3} {'type': 'loss', 'content': 0.017282264307141304, 'timestamp': '2025-09-30 23:11:42.823365', 'step': 6796, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:42.881741', 'step': 6796, 'epoch': 3} {'type': 'loss', 'content': 3.318950257380493e-05, 'timestamp': '2025-09-30 23:11:42.884435', 'step': 6797, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:42.941234', 'step': 6797, 'epoch': 3} {'type': 'loss', 'content': 0.00023670887458138168, 'timestamp': '2025-09-30 23:11:42.945387', 'step': 6798, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:43.002839', 'step': 6798, 'epoch': 3} {'type': 'loss', 'content': 0.0006344518042169511, 'timestamp': '2025-09-30 23:11:43.006154', 'step': 6799, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:43.063316', 'step': 6799, 'epoch': 3} {'type': 'loss', 'content': 0.0019264478469267488, 'timestamp': '2025-09-30 23:11:43.070749', 'step': 6800, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:43.129042', 'step': 6800, 'epoch': 3} {'type': 'loss', 'content': 0.007947307080030441, 'timestamp': '2025-09-30 23:11:43.133359', 'step': 6801, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:43.193976', 'step': 6801, 'epoch': 3} {'type': 'loss', 'content': 0.06043774262070656, 'timestamp': '2025-09-30 23:11:43.198983', 'step': 6802, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:43.265291', 'step': 6802, 'epoch': 3} {'type': 'loss', 'content': 0.015133721753954887, 'timestamp': '2025-09-30 23:11:43.272220', 'step': 6803, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:43.336804', 'step': 6803, 'epoch': 3} {'type': 'loss', 'content': 0.0002316362370038405, 'timestamp': '2025-09-30 23:11:43.342629', 'step': 6804, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:43.400612', 'step': 6804, 'epoch': 3} {'type': 'loss', 'content': 0.000709126761648804, 'timestamp': '2025-09-30 23:11:43.404085', 'step': 6805, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:43.462897', 'step': 6805, 'epoch': 3} {'type': 'loss', 'content': 0.005197132937610149, 'timestamp': '2025-09-30 23:11:43.466796', 'step': 6806, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:43.530312', 'step': 6806, 'epoch': 3} {'type': 'loss', 'content': 0.0007826205110177398, 'timestamp': '2025-09-30 23:11:43.534386', 'step': 6807, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:43.591317', 'step': 6807, 'epoch': 3} {'type': 'loss', 'content': 0.012917207553982735, 'timestamp': '2025-09-30 23:11:43.602086', 'step': 6808, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:43.660126', 'step': 6808, 'epoch': 3} {'type': 'loss', 'content': 6.596314051421359e-05, 'timestamp': '2025-09-30 23:11:43.664289', 'step': 6809, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:43.723462', 'step': 6809, 'epoch': 3} {'type': 'loss', 'content': 0.00011809384159278125, 'timestamp': '2025-09-30 23:11:43.725564', 'step': 6810, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:43.782283', 'step': 6810, 'epoch': 3} {'type': 'loss', 'content': 0.027435805648565292, 'timestamp': '2025-09-30 23:11:43.784579', 'step': 6811, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:43.840097', 'step': 6811, 'epoch': 3} {'type': 'loss', 'content': 0.0031522440258413553, 'timestamp': '2025-09-30 23:11:43.847306', 'step': 6812, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:43.908417', 'step': 6812, 'epoch': 3} {'type': 'loss', 'content': 0.0016565370606258512, 'timestamp': '2025-09-30 23:11:43.912044', 'step': 6813, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:43.972003', 'step': 6813, 'epoch': 3} {'type': 'loss', 'content': 0.05272560194134712, 'timestamp': '2025-09-30 23:11:43.975474', 'step': 6814, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:44.035403', 'step': 6814, 'epoch': 3} {'type': 'loss', 'content': 0.011821037158370018, 'timestamp': '2025-09-30 23:11:44.037969', 'step': 6815, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:44.095863', 'step': 6815, 'epoch': 3} {'type': 'loss', 'content': 0.004194310400635004, 'timestamp': '2025-09-30 23:11:44.102210', 'step': 6816, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:44.160955', 'step': 6816, 'epoch': 3} {'type': 'loss', 'content': 0.0001092040547518991, 'timestamp': '2025-09-30 23:11:44.165175', 'step': 6817, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:44.225079', 'step': 6817, 'epoch': 3} {'type': 'loss', 'content': 4.315420301281847e-05, 'timestamp': '2025-09-30 23:11:44.228839', 'step': 6818, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:44.295377', 'step': 6818, 'epoch': 3} {'type': 'loss', 'content': 0.006588122341781855, 'timestamp': '2025-09-30 23:11:44.300895', 'step': 6819, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:44.359285', 'step': 6819, 'epoch': 3} {'type': 'loss', 'content': 0.0003640396462287754, 'timestamp': '2025-09-30 23:11:44.366120', 'step': 6820, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:44.433222', 'step': 6820, 'epoch': 3} {'type': 'loss', 'content': 0.00024130925885401666, 'timestamp': '2025-09-30 23:11:44.437211', 'step': 6821, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:44.495254', 'step': 6821, 'epoch': 3} {'type': 'loss', 'content': 0.007281295955181122, 'timestamp': '2025-09-30 23:11:44.497999', 'step': 6822, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:44.563614', 'step': 6822, 'epoch': 3} {'type': 'loss', 'content': 0.0047638388350605965, 'timestamp': '2025-09-30 23:11:44.568798', 'step': 6823, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:44.634873', 'step': 6823, 'epoch': 3} {'type': 'loss', 'content': 0.006902820430696011, 'timestamp': '2025-09-30 23:11:44.646410', 'step': 6824, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:44.708445', 'step': 6824, 'epoch': 3} {'type': 'loss', 'content': 0.0012421088758856058, 'timestamp': '2025-09-30 23:11:44.710771', 'step': 6825, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:44.774934', 'step': 6825, 'epoch': 3} {'type': 'loss', 'content': 0.0027636531740427017, 'timestamp': '2025-09-30 23:11:44.777850', 'step': 6826, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:44.840977', 'step': 6826, 'epoch': 3} {'type': 'loss', 'content': 0.018319416791200638, 'timestamp': '2025-09-30 23:11:44.847795', 'step': 6827, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:44.909618', 'step': 6827, 'epoch': 3} {'type': 'loss', 'content': 0.03714371100068092, 'timestamp': '2025-09-30 23:11:44.921702', 'step': 6828, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:45.005002', 'step': 6828, 'epoch': 3} {'type': 'loss', 'content': 0.005999617278575897, 'timestamp': '2025-09-30 23:11:45.008367', 'step': 6829, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:45.083164', 'step': 6829, 'epoch': 3} {'type': 'loss', 'content': 0.00015876647375989705, 'timestamp': '2025-09-30 23:11:45.085925', 'step': 6830, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:45.156935', 'step': 6830, 'epoch': 3} {'type': 'loss', 'content': 8.918536332203075e-05, 'timestamp': '2025-09-30 23:11:45.162445', 'step': 6831, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:45.227423', 'step': 6831, 'epoch': 3} {'type': 'loss', 'content': 0.047573987394571304, 'timestamp': '2025-09-30 23:11:45.235687', 'step': 6832, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:45.299730', 'step': 6832, 'epoch': 3} {'type': 'loss', 'content': 0.00691990414634347, 'timestamp': '2025-09-30 23:11:45.305455', 'step': 6833, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:45.370366', 'step': 6833, 'epoch': 3} {'type': 'loss', 'content': 8.439691009698436e-05, 'timestamp': '2025-09-30 23:11:45.377904', 'step': 6834, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:45.445060', 'step': 6834, 'epoch': 3} {'type': 'loss', 'content': 0.00035413276054896414, 'timestamp': '2025-09-30 23:11:45.450575', 'step': 6835, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:45.516615', 'step': 6835, 'epoch': 3} {'type': 'loss', 'content': 0.00012050955410813913, 'timestamp': '2025-09-30 23:11:45.525564', 'step': 6836, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:45.590206', 'step': 6836, 'epoch': 3} {'type': 'loss', 'content': 0.0002901832922361791, 'timestamp': '2025-09-30 23:11:45.592084', 'step': 6837, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:45.659811', 'step': 6837, 'epoch': 3} {'type': 'loss', 'content': 0.0006739121163263917, 'timestamp': '2025-09-30 23:11:45.662812', 'step': 6838, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:45.734282', 'step': 6838, 'epoch': 3} {'type': 'loss', 'content': 0.06276438385248184, 'timestamp': '2025-09-30 23:11:45.740648', 'step': 6839, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:45.803278', 'step': 6839, 'epoch': 3} {'type': 'loss', 'content': 0.00011810970318038017, 'timestamp': '2025-09-30 23:11:45.809560', 'step': 6840, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1596914505344}], 'timestamp': '2025-09-30 23:11:50.339753', 'step': 6840, 'epoch': 3} {'type': 'pplx', 'content': 7611908.296985189, 'timestamp': '2025-09-30 23:11:50.344011', 'step': 6840, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:50.401812', 'step': 6840, 'epoch': 3} {'type': 'loss', 'content': 0.004532340448349714, 'timestamp': '2025-09-30 23:11:50.405424', 'step': 6841, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:50.462810', 'step': 6841, 'epoch': 3} {'type': 'loss', 'content': 0.0009978300658985972, 'timestamp': '2025-09-30 23:11:50.467502', 'step': 6842, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:50.527495', 'step': 6842, 'epoch': 3} {'type': 'loss', 'content': 0.010533260181546211, 'timestamp': '2025-09-30 23:11:50.536292', 'step': 6843, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:50.615550', 'step': 6843, 'epoch': 3} {'type': 'loss', 'content': 0.002174974884837866, 'timestamp': '2025-09-30 23:11:50.623964', 'step': 6844, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:50.686329', 'step': 6844, 'epoch': 3} {'type': 'loss', 'content': 0.00044047116534784436, 'timestamp': '2025-09-30 23:11:50.689598', 'step': 6845, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:50.751305', 'step': 6845, 'epoch': 3} {'type': 'loss', 'content': 0.0013416383881121874, 'timestamp': '2025-09-30 23:11:50.754606', 'step': 6846, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:50.814030', 'step': 6846, 'epoch': 3} {'type': 'loss', 'content': 0.007906908169388771, 'timestamp': '2025-09-30 23:11:50.817745', 'step': 6847, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:50.876258', 'step': 6847, 'epoch': 3} {'type': 'loss', 'content': 0.004590172786265612, 'timestamp': '2025-09-30 23:11:50.884351', 'step': 6848, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:50.956998', 'step': 6848, 'epoch': 3} {'type': 'loss', 'content': 0.00018609190010465682, 'timestamp': '2025-09-30 23:11:50.964839', 'step': 6849, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:51.027319', 'step': 6849, 'epoch': 3} {'type': 'loss', 'content': 0.0008428186411038041, 'timestamp': '2025-09-30 23:11:51.032269', 'step': 6850, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:51.092371', 'step': 6850, 'epoch': 3} {'type': 'loss', 'content': 0.0033436056692153215, 'timestamp': '2025-09-30 23:11:51.096351', 'step': 6851, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:51.155127', 'step': 6851, 'epoch': 3} {'type': 'loss', 'content': 7.662351708859205e-05, 'timestamp': '2025-09-30 23:11:51.164025', 'step': 6852, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:51.230730', 'step': 6852, 'epoch': 3} {'type': 'loss', 'content': 0.0009683701209723949, 'timestamp': '2025-09-30 23:11:51.235954', 'step': 6853, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:11:51.297232', 'step': 6853, 'epoch': 3} {'type': 'loss', 'content': 0.023413192480802536, 'timestamp': '2025-09-30 23:11:51.300431', 'step': 6854, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:51.356815', 'step': 6854, 'epoch': 3} {'type': 'loss', 'content': 0.0002841181121766567, 'timestamp': '2025-09-30 23:11:51.360824', 'step': 6855, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:51.417050', 'step': 6855, 'epoch': 3} {'type': 'loss', 'content': 0.0011757240863516927, 'timestamp': '2025-09-30 23:11:51.428772', 'step': 6856, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:51.502912', 'step': 6856, 'epoch': 3} {'type': 'loss', 'content': 0.00013025577936787158, 'timestamp': '2025-09-30 23:11:51.507443', 'step': 6857, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:51.565697', 'step': 6857, 'epoch': 3} {'type': 'loss', 'content': 0.0015747741563245654, 'timestamp': '2025-09-30 23:11:51.570604', 'step': 6858, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:51.630017', 'step': 6858, 'epoch': 3} {'type': 'loss', 'content': 0.0004856027662754059, 'timestamp': '2025-09-30 23:11:51.636132', 'step': 6859, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:51.703833', 'step': 6859, 'epoch': 3} {'type': 'loss', 'content': 6.470674270531163e-05, 'timestamp': '2025-09-30 23:11:51.711036', 'step': 6860, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:51.768054', 'step': 6860, 'epoch': 3} {'type': 'loss', 'content': 0.0004334398836363107, 'timestamp': '2025-09-30 23:11:51.771399', 'step': 6861, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:51.828495', 'step': 6861, 'epoch': 3} {'type': 'loss', 'content': 0.001652287319302559, 'timestamp': '2025-09-30 23:11:51.831663', 'step': 6862, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:51.905856', 'step': 6862, 'epoch': 3} {'type': 'loss', 'content': 0.0007588401203975081, 'timestamp': '2025-09-30 23:11:51.910896', 'step': 6863, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:51.971833', 'step': 6863, 'epoch': 3} {'type': 'loss', 'content': 0.0001284613972529769, 'timestamp': '2025-09-30 23:11:51.978072', 'step': 6864, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:52.033631', 'step': 6864, 'epoch': 3} {'type': 'loss', 'content': 0.0003502220497466624, 'timestamp': '2025-09-30 23:11:52.039063', 'step': 6865, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:52.100126', 'step': 6865, 'epoch': 3} {'type': 'loss', 'content': 0.0008780598291195929, 'timestamp': '2025-09-30 23:11:52.109351', 'step': 6866, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:52.173810', 'step': 6866, 'epoch': 3} {'type': 'loss', 'content': 7.667751924600452e-05, 'timestamp': '2025-09-30 23:11:52.177483', 'step': 6867, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:52.234639', 'step': 6867, 'epoch': 3} {'type': 'loss', 'content': 0.000828067131806165, 'timestamp': '2025-09-30 23:11:52.243295', 'step': 6868, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:52.300898', 'step': 6868, 'epoch': 3} {'type': 'loss', 'content': 0.002012557815760374, 'timestamp': '2025-09-30 23:11:52.308445', 'step': 6869, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:52.378609', 'step': 6869, 'epoch': 3} {'type': 'loss', 'content': 9.223334200214595e-05, 'timestamp': '2025-09-30 23:11:52.388942', 'step': 6870, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:52.452674', 'step': 6870, 'epoch': 3} {'type': 'loss', 'content': 0.00032713127438910306, 'timestamp': '2025-09-30 23:11:52.455126', 'step': 6871, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:52.513364', 'step': 6871, 'epoch': 3} {'type': 'loss', 'content': 0.00042487209429964423, 'timestamp': '2025-09-30 23:11:52.521838', 'step': 6872, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:52.581009', 'step': 6872, 'epoch': 3} {'type': 'loss', 'content': 0.00019686229643411934, 'timestamp': '2025-09-30 23:11:52.592048', 'step': 6873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:52.656820', 'step': 6873, 'epoch': 3} {'type': 'loss', 'content': 0.01310041081160307, 'timestamp': '2025-09-30 23:11:52.669746', 'step': 6874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:52.738793', 'step': 6874, 'epoch': 3} {'type': 'loss', 'content': 0.0004498482740018517, 'timestamp': '2025-09-30 23:11:52.742412', 'step': 6875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:52.799508', 'step': 6875, 'epoch': 3} {'type': 'loss', 'content': 0.006197819951921701, 'timestamp': '2025-09-30 23:11:52.806901', 'step': 6876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:52.862626', 'step': 6876, 'epoch': 3} {'type': 'loss', 'content': 0.012539928779006004, 'timestamp': '2025-09-30 23:11:52.867025', 'step': 6877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:52.931031', 'step': 6877, 'epoch': 3} {'type': 'loss', 'content': 0.029754996299743652, 'timestamp': '2025-09-30 23:11:52.940438', 'step': 6878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:11:53.013755', 'step': 6878, 'epoch': 3} {'type': 'loss', 'content': 0.0002052679192274809, 'timestamp': '2025-09-30 23:11:53.019366', 'step': 6879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:53.077081', 'step': 6879, 'epoch': 3} {'type': 'loss', 'content': 9.285889973398298e-05, 'timestamp': '2025-09-30 23:11:53.086696', 'step': 6880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:53.145811', 'step': 6880, 'epoch': 3} {'type': 'loss', 'content': 0.0015224531525745988, 'timestamp': '2025-09-30 23:11:53.153256', 'step': 6881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:53.211664', 'step': 6881, 'epoch': 3} {'type': 'loss', 'content': 0.002446553437039256, 'timestamp': '2025-09-30 23:11:53.218180', 'step': 6882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:53.281741', 'step': 6882, 'epoch': 3} {'type': 'loss', 'content': 0.00011263950727880001, 'timestamp': '2025-09-30 23:11:53.286874', 'step': 6883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:53.349277', 'step': 6883, 'epoch': 3} {'type': 'loss', 'content': 0.0003298702067695558, 'timestamp': '2025-09-30 23:11:53.360782', 'step': 6884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:53.419751', 'step': 6884, 'epoch': 3} {'type': 'loss', 'content': 0.00555823277682066, 'timestamp': '2025-09-30 23:11:53.424165', 'step': 6885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:53.481973', 'step': 6885, 'epoch': 3} {'type': 'loss', 'content': 0.008687907829880714, 'timestamp': '2025-09-30 23:11:53.488377', 'step': 6886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:53.554908', 'step': 6886, 'epoch': 3} {'type': 'loss', 'content': 4.231333150528371e-05, 'timestamp': '2025-09-30 23:11:53.564873', 'step': 6887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:53.637124', 'step': 6887, 'epoch': 3} {'type': 'loss', 'content': 0.00013470163685269654, 'timestamp': '2025-09-30 23:11:53.646550', 'step': 6888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:53.711137', 'step': 6888, 'epoch': 3} {'type': 'loss', 'content': 0.001359384274110198, 'timestamp': '2025-09-30 23:11:53.719110', 'step': 6889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:53.784197', 'step': 6889, 'epoch': 3} {'type': 'loss', 'content': 0.00017671413661446422, 'timestamp': '2025-09-30 23:11:53.792794', 'step': 6890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:53.863388', 'step': 6890, 'epoch': 3} {'type': 'loss', 'content': 7.045392703730613e-05, 'timestamp': '2025-09-30 23:11:53.872269', 'step': 6891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:53.941008', 'step': 6891, 'epoch': 3} {'type': 'loss', 'content': 0.0004550036392174661, 'timestamp': '2025-09-30 23:11:53.947356', 'step': 6892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:54.011468', 'step': 6892, 'epoch': 3} {'type': 'loss', 'content': 0.01759733073413372, 'timestamp': '2025-09-30 23:11:54.015013', 'step': 6893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:54.073416', 'step': 6893, 'epoch': 3} {'type': 'loss', 'content': 0.009066975675523281, 'timestamp': '2025-09-30 23:11:54.079499', 'step': 6894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:54.148013', 'step': 6894, 'epoch': 3} {'type': 'loss', 'content': 0.0008118936093524098, 'timestamp': '2025-09-30 23:11:54.161173', 'step': 6895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:54.234288', 'step': 6895, 'epoch': 3} {'type': 'loss', 'content': 0.005664784926921129, 'timestamp': '2025-09-30 23:11:54.247007', 'step': 6896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:54.305088', 'step': 6896, 'epoch': 3} {'type': 'loss', 'content': 0.0039633652195334435, 'timestamp': '2025-09-30 23:11:54.312132', 'step': 6897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:54.379957', 'step': 6897, 'epoch': 3} {'type': 'loss', 'content': 8.56818733154796e-05, 'timestamp': '2025-09-30 23:11:54.391886', 'step': 6898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:54.454468', 'step': 6898, 'epoch': 3} {'type': 'loss', 'content': 0.002793683437630534, 'timestamp': '2025-09-30 23:11:54.467974', 'step': 6899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:54.537764', 'step': 6899, 'epoch': 3} {'type': 'loss', 'content': 0.0012561719631776214, 'timestamp': '2025-09-30 23:11:54.544624', 'step': 6900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:54.608932', 'step': 6900, 'epoch': 3} {'type': 'loss', 'content': 0.0001020448180497624, 'timestamp': '2025-09-30 23:11:54.612026', 'step': 6901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:54.671161', 'step': 6901, 'epoch': 3} {'type': 'loss', 'content': 3.227670458727516e-05, 'timestamp': '2025-09-30 23:11:54.673891', 'step': 6902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:54.730820', 'step': 6902, 'epoch': 3} {'type': 'loss', 'content': 0.023259475827217102, 'timestamp': '2025-09-30 23:11:54.733544', 'step': 6903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:54.789601', 'step': 6903, 'epoch': 3} {'type': 'loss', 'content': 0.00528291892260313, 'timestamp': '2025-09-30 23:11:54.797481', 'step': 6904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:54.867008', 'step': 6904, 'epoch': 3} {'type': 'loss', 'content': 0.00027771852910518646, 'timestamp': '2025-09-30 23:11:54.888488', 'step': 6905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:54.962741', 'step': 6905, 'epoch': 3} {'type': 'loss', 'content': 0.005041755735874176, 'timestamp': '2025-09-30 23:11:54.974297', 'step': 6906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:55.041264', 'step': 6906, 'epoch': 3} {'type': 'loss', 'content': 0.0009804225992411375, 'timestamp': '2025-09-30 23:11:55.056924', 'step': 6907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:55.145052', 'step': 6907, 'epoch': 3} {'type': 'loss', 'content': 0.0034319348633289337, 'timestamp': '2025-09-30 23:11:55.158756', 'step': 6908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:55.214618', 'step': 6908, 'epoch': 3} {'type': 'loss', 'content': 0.00024467436014674604, 'timestamp': '2025-09-30 23:11:55.219139', 'step': 6909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:55.288520', 'step': 6909, 'epoch': 3} {'type': 'loss', 'content': 0.00014206304331310093, 'timestamp': '2025-09-30 23:11:55.302195', 'step': 6910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:55.389279', 'step': 6910, 'epoch': 3} {'type': 'loss', 'content': 0.00041932021849788725, 'timestamp': '2025-09-30 23:11:55.403026', 'step': 6911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:55.489033', 'step': 6911, 'epoch': 3} {'type': 'loss', 'content': 0.00010660752013791353, 'timestamp': '2025-09-30 23:11:55.506143', 'step': 6912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:55.571008', 'step': 6912, 'epoch': 3} {'type': 'loss', 'content': 0.030470672994852066, 'timestamp': '2025-09-30 23:11:55.575843', 'step': 6913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:55.641115', 'step': 6913, 'epoch': 3} {'type': 'loss', 'content': 0.001020606723614037, 'timestamp': '2025-09-30 23:11:55.654682', 'step': 6914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:55.723631', 'step': 6914, 'epoch': 3} {'type': 'loss', 'content': 0.013840220868587494, 'timestamp': '2025-09-30 23:11:55.737551', 'step': 6915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:55.798900', 'step': 6915, 'epoch': 3} {'type': 'loss', 'content': 0.00013402184413280338, 'timestamp': '2025-09-30 23:11:55.805987', 'step': 6916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:55.886812', 'step': 6916, 'epoch': 3} {'type': 'loss', 'content': 0.0007506259717047215, 'timestamp': '2025-09-30 23:11:55.890601', 'step': 6917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:55.954489', 'step': 6917, 'epoch': 3} {'type': 'loss', 'content': 0.004840568173676729, 'timestamp': '2025-09-30 23:11:55.958448', 'step': 6918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:56.034152', 'step': 6918, 'epoch': 3} {'type': 'loss', 'content': 0.00027063366724178195, 'timestamp': '2025-09-30 23:11:56.045863', 'step': 6919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:56.125471', 'step': 6919, 'epoch': 3} {'type': 'loss', 'content': 0.003338398179039359, 'timestamp': '2025-09-30 23:11:56.132535', 'step': 6920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:56.206001', 'step': 6920, 'epoch': 3} {'type': 'loss', 'content': 0.0020438958890736103, 'timestamp': '2025-09-30 23:11:56.209852', 'step': 6921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:56.291772', 'step': 6921, 'epoch': 3} {'type': 'loss', 'content': 0.0018059142166748643, 'timestamp': '2025-09-30 23:11:56.302820', 'step': 6922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:56.380157', 'step': 6922, 'epoch': 3} {'type': 'loss', 'content': 0.00020029039296787232, 'timestamp': '2025-09-30 23:11:56.394556', 'step': 6923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:56.507792', 'step': 6923, 'epoch': 3} {'type': 'loss', 'content': 0.00027166149811819196, 'timestamp': '2025-09-30 23:11:56.514364', 'step': 6924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:56.603092', 'step': 6924, 'epoch': 3} {'type': 'loss', 'content': 0.0006502497126348317, 'timestamp': '2025-09-30 23:11:56.607718', 'step': 6925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:56.676183', 'step': 6925, 'epoch': 3} {'type': 'loss', 'content': 0.0005269968532957137, 'timestamp': '2025-09-30 23:11:56.691157', 'step': 6926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:56.782719', 'step': 6926, 'epoch': 3} {'type': 'loss', 'content': 4.931294097332284e-05, 'timestamp': '2025-09-30 23:11:56.797442', 'step': 6927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:56.883435', 'step': 6927, 'epoch': 3} {'type': 'loss', 'content': 9.214420424541458e-05, 'timestamp': '2025-09-30 23:11:56.900724', 'step': 6928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:56.958368', 'step': 6928, 'epoch': 3} {'type': 'loss', 'content': 0.010324268601834774, 'timestamp': '2025-09-30 23:11:56.966841', 'step': 6929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:57.043707', 'step': 6929, 'epoch': 3} {'type': 'loss', 'content': 0.00018997106235474348, 'timestamp': '2025-09-30 23:11:57.051645', 'step': 6930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:57.131826', 'step': 6930, 'epoch': 3} {'type': 'loss', 'content': 0.00013255928934086114, 'timestamp': '2025-09-30 23:11:57.145692', 'step': 6931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:57.240314', 'step': 6931, 'epoch': 3} {'type': 'loss', 'content': 0.001153977238573134, 'timestamp': '2025-09-30 23:11:57.261559', 'step': 6932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:57.320739', 'step': 6932, 'epoch': 3} {'type': 'loss', 'content': 0.0002156884438591078, 'timestamp': '2025-09-30 23:11:57.337426', 'step': 6933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:57.452322', 'step': 6933, 'epoch': 3} {'type': 'loss', 'content': 0.0027296876069158316, 'timestamp': '2025-09-30 23:11:57.477058', 'step': 6934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:57.553344', 'step': 6934, 'epoch': 3} {'type': 'loss', 'content': 0.004167180974036455, 'timestamp': '2025-09-30 23:11:57.571348', 'step': 6935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:57.665178', 'step': 6935, 'epoch': 3} {'type': 'loss', 'content': 0.022071193903684616, 'timestamp': '2025-09-30 23:11:57.689278', 'step': 6936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:57.791770', 'step': 6936, 'epoch': 3} {'type': 'loss', 'content': 0.01314992643892765, 'timestamp': '2025-09-30 23:11:57.803305', 'step': 6937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:57.878591', 'step': 6937, 'epoch': 3} {'type': 'loss', 'content': 0.0004911072901450098, 'timestamp': '2025-09-30 23:11:57.884300', 'step': 6938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:57.952579', 'step': 6938, 'epoch': 3} {'type': 'loss', 'content': 0.00012242952652741224, 'timestamp': '2025-09-30 23:11:57.956036', 'step': 6939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:58.043922', 'step': 6939, 'epoch': 3} {'type': 'loss', 'content': 0.0015342843253165483, 'timestamp': '2025-09-30 23:11:58.061225', 'step': 6940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:58.141736', 'step': 6940, 'epoch': 3} {'type': 'loss', 'content': 5.006141873309389e-05, 'timestamp': '2025-09-30 23:11:58.147193', 'step': 6941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:11:58.210774', 'step': 6941, 'epoch': 3} {'type': 'loss', 'content': 0.00215044803917408, 'timestamp': '2025-09-30 23:11:58.215795', 'step': 6942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:58.304918', 'step': 6942, 'epoch': 3} {'type': 'loss', 'content': 0.00020475169003475457, 'timestamp': '2025-09-30 23:11:58.310019', 'step': 6943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:58.389005', 'step': 6943, 'epoch': 3} {'type': 'loss', 'content': 0.007731777615845203, 'timestamp': '2025-09-30 23:11:58.408694', 'step': 6944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:58.492333', 'step': 6944, 'epoch': 3} {'type': 'loss', 'content': 0.0003201710060238838, 'timestamp': '2025-09-30 23:11:58.501489', 'step': 6945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:58.579248', 'step': 6945, 'epoch': 3} {'type': 'loss', 'content': 0.0007197738741524518, 'timestamp': '2025-09-30 23:11:58.592780', 'step': 6946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:58.670318', 'step': 6946, 'epoch': 3} {'type': 'loss', 'content': 0.0001845260412665084, 'timestamp': '2025-09-30 23:11:58.673080', 'step': 6947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:58.746737', 'step': 6947, 'epoch': 3} {'type': 'loss', 'content': 0.0014841749798506498, 'timestamp': '2025-09-30 23:11:58.767215', 'step': 6948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:58.853436', 'step': 6948, 'epoch': 3} {'type': 'loss', 'content': 0.00047632193309254944, 'timestamp': '2025-09-30 23:11:58.866656', 'step': 6949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:58.944307', 'step': 6949, 'epoch': 3} {'type': 'loss', 'content': 0.00010867668606806546, 'timestamp': '2025-09-30 23:11:58.950248', 'step': 6950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:59.027274', 'step': 6950, 'epoch': 3} {'type': 'loss', 'content': 0.00011228422954445705, 'timestamp': '2025-09-30 23:11:59.042230', 'step': 6951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:59.126092', 'step': 6951, 'epoch': 3} {'type': 'loss', 'content': 0.008417649194598198, 'timestamp': '2025-09-30 23:11:59.134604', 'step': 6952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:59.194338', 'step': 6952, 'epoch': 3} {'type': 'loss', 'content': 8.731234993319958e-05, 'timestamp': '2025-09-30 23:11:59.210820', 'step': 6953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:59.292498', 'step': 6953, 'epoch': 3} {'type': 'loss', 'content': 2.286789458594285e-05, 'timestamp': '2025-09-30 23:11:59.305053', 'step': 6954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:59.388129', 'step': 6954, 'epoch': 3} {'type': 'loss', 'content': 0.00022882113989908248, 'timestamp': '2025-09-30 23:11:59.403421', 'step': 6955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:59.484347', 'step': 6955, 'epoch': 3} {'type': 'loss', 'content': 0.0012946168426424265, 'timestamp': '2025-09-30 23:11:59.497054', 'step': 6956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:59.571708', 'step': 6956, 'epoch': 3} {'type': 'loss', 'content': 0.000905773660633713, 'timestamp': '2025-09-30 23:11:59.581916', 'step': 6957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:59.659821', 'step': 6957, 'epoch': 3} {'type': 'loss', 'content': 0.0009792859200388193, 'timestamp': '2025-09-30 23:11:59.663406', 'step': 6958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:59.746680', 'step': 6958, 'epoch': 3} {'type': 'loss', 'content': 4.554805127554573e-05, 'timestamp': '2025-09-30 23:11:59.759791', 'step': 6959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:11:59.841271', 'step': 6959, 'epoch': 3} {'type': 'loss', 'content': 5.786714973510243e-05, 'timestamp': '2025-09-30 23:11:59.849077', 'step': 6960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:11:59.905915', 'step': 6960, 'epoch': 3} {'type': 'loss', 'content': 0.018906647339463234, 'timestamp': '2025-09-30 23:11:59.909603', 'step': 6961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:11:59.980239', 'step': 6961, 'epoch': 3} {'type': 'loss', 'content': 0.0005307073006406426, 'timestamp': '2025-09-30 23:11:59.984890', 'step': 6962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:00.061318', 'step': 6962, 'epoch': 3} {'type': 'loss', 'content': 5.6793971452862024e-05, 'timestamp': '2025-09-30 23:12:00.066177', 'step': 6963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:00.155518', 'step': 6963, 'epoch': 3} {'type': 'loss', 'content': 0.00038179970579221845, 'timestamp': '2025-09-30 23:12:00.163759', 'step': 6964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:12:00.224644', 'step': 6964, 'epoch': 3} {'type': 'loss', 'content': 0.0002308934199390933, 'timestamp': '2025-09-30 23:12:00.227834', 'step': 6965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:12:00.285889', 'step': 6965, 'epoch': 3} {'type': 'loss', 'content': 0.00027862394927069545, 'timestamp': '2025-09-30 23:12:00.295293', 'step': 6966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:00.371554', 'step': 6966, 'epoch': 3} {'type': 'loss', 'content': 0.0009156878222711384, 'timestamp': '2025-09-30 23:12:00.381104', 'step': 6967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:00.451127', 'step': 6967, 'epoch': 3} {'type': 'loss', 'content': 0.0009196632890962064, 'timestamp': '2025-09-30 23:12:00.471966', 'step': 6968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:12:00.544650', 'step': 6968, 'epoch': 3} {'type': 'loss', 'content': 0.0006923266919329762, 'timestamp': '2025-09-30 23:12:00.564140', 'step': 6969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:00.643294', 'step': 6969, 'epoch': 3} {'type': 'loss', 'content': 0.012259126640856266, 'timestamp': '2025-09-30 23:12:00.651678', 'step': 6970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:00.729952', 'step': 6970, 'epoch': 3} {'type': 'loss', 'content': 0.0004427572130225599, 'timestamp': '2025-09-30 23:12:00.739168', 'step': 6971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:00.808952', 'step': 6971, 'epoch': 3} {'type': 'loss', 'content': 7.595335046062246e-05, 'timestamp': '2025-09-30 23:12:00.823522', 'step': 6972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:00.894782', 'step': 6972, 'epoch': 3} {'type': 'loss', 'content': 0.000780046742875129, 'timestamp': '2025-09-30 23:12:00.904267', 'step': 6973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:00.978085', 'step': 6973, 'epoch': 3} {'type': 'loss', 'content': 4.566404822980985e-05, 'timestamp': '2025-09-30 23:12:00.987923', 'step': 6974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:12:01.063813', 'step': 6974, 'epoch': 3} {'type': 'loss', 'content': 0.025266006588935852, 'timestamp': '2025-09-30 23:12:01.071890', 'step': 6975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:12:01.138327', 'step': 6975, 'epoch': 3} {'type': 'loss', 'content': 7.336668204516172e-05, 'timestamp': '2025-09-30 23:12:01.149234', 'step': 6976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:01.219469', 'step': 6976, 'epoch': 3} {'type': 'loss', 'content': 0.00014655393897555768, 'timestamp': '2025-09-30 23:12:01.222616', 'step': 6977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:01.288528', 'step': 6977, 'epoch': 3} {'type': 'loss', 'content': 8.385050023207441e-05, 'timestamp': '2025-09-30 23:12:01.291473', 'step': 6978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:01.357149', 'step': 6978, 'epoch': 3} {'type': 'loss', 'content': 2.4567823857069016e-05, 'timestamp': '2025-09-30 23:12:01.366318', 'step': 6979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:12:01.430313', 'step': 6979, 'epoch': 3} {'type': 'loss', 'content': 0.001009592437185347, 'timestamp': '2025-09-30 23:12:01.444067', 'step': 6980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:01.508224', 'step': 6980, 'epoch': 3} {'type': 'loss', 'content': 0.0002619076694827527, 'timestamp': '2025-09-30 23:12:01.510955', 'step': 6981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:01.579893', 'step': 6981, 'epoch': 3} {'type': 'loss', 'content': 0.010405824519693851, 'timestamp': '2025-09-30 23:12:01.587137', 'step': 6982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:01.651850', 'step': 6982, 'epoch': 3} {'type': 'loss', 'content': 0.00010209825995843858, 'timestamp': '2025-09-30 23:12:01.655828', 'step': 6983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:01.723028', 'step': 6983, 'epoch': 3} {'type': 'loss', 'content': 0.0018726277630776167, 'timestamp': '2025-09-30 23:12:01.730359', 'step': 6984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:12:01.797407', 'step': 6984, 'epoch': 3} {'type': 'loss', 'content': 0.0007800746825523674, 'timestamp': '2025-09-30 23:12:01.803872', 'step': 6985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:01.864040', 'step': 6985, 'epoch': 3} {'type': 'loss', 'content': 0.001516220043413341, 'timestamp': '2025-09-30 23:12:01.872179', 'step': 6986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:01.940073', 'step': 6986, 'epoch': 3} {'type': 'loss', 'content': 8.219679148169234e-05, 'timestamp': '2025-09-30 23:12:01.946753', 'step': 6987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:02.014555', 'step': 6987, 'epoch': 3} {'type': 'loss', 'content': 0.001059852889738977, 'timestamp': '2025-09-30 23:12:02.021498', 'step': 6988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:02.088409', 'step': 6988, 'epoch': 3} {'type': 'loss', 'content': 6.528243829961866e-05, 'timestamp': '2025-09-30 23:12:02.095881', 'step': 6989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:12:02.156540', 'step': 6989, 'epoch': 3} {'type': 'loss', 'content': 0.001266120350919664, 'timestamp': '2025-09-30 23:12:02.163898', 'step': 6990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:02.243012', 'step': 6990, 'epoch': 3} {'type': 'loss', 'content': 7.314236427191645e-05, 'timestamp': '2025-09-30 23:12:02.248822', 'step': 6991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:02.315860', 'step': 6991, 'epoch': 3} {'type': 'loss', 'content': 0.028391767293214798, 'timestamp': '2025-09-30 23:12:02.322169', 'step': 6992, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1596914505344}], 'timestamp': '2025-09-30 23:12:07.500057', 'step': 6992, 'epoch': 3} {'type': 'pplx', 'content': 6979420.469487675, 'timestamp': '2025-09-30 23:12:07.512308', 'step': 6992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:12:07.575636', 'step': 6992, 'epoch': 3} {'type': 'loss', 'content': 0.00819263607263565, 'timestamp': '2025-09-30 23:12:07.578879', 'step': 6993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:07.659293', 'step': 6993, 'epoch': 3} {'type': 'loss', 'content': 5.919569593970664e-05, 'timestamp': '2025-09-30 23:12:07.662975', 'step': 6994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:07.724632', 'step': 6994, 'epoch': 3} {'type': 'loss', 'content': 0.0004080995568074286, 'timestamp': '2025-09-30 23:12:07.728092', 'step': 6995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:07.814068', 'step': 6995, 'epoch': 3} {'type': 'loss', 'content': 0.001490383525379002, 'timestamp': '2025-09-30 23:12:07.822164', 'step': 6996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:07.888632', 'step': 6996, 'epoch': 3} {'type': 'loss', 'content': 0.0002957914548460394, 'timestamp': '2025-09-30 23:12:07.903373', 'step': 6997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:07.973323', 'step': 6997, 'epoch': 3} {'type': 'loss', 'content': 2.4567105356254615e-05, 'timestamp': '2025-09-30 23:12:07.983827', 'step': 6998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:08.059358', 'step': 6998, 'epoch': 3} {'type': 'loss', 'content': 0.01570468582212925, 'timestamp': '2025-09-30 23:12:08.063795', 'step': 6999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:08.145033', 'step': 6999, 'epoch': 3} {'type': 'loss', 'content': 0.00016282647266052663, 'timestamp': '2025-09-30 23:12:08.159028', 'step': 7000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 7000', 'timestamp': '2025-09-30 23:12:08.649126', 'step': 7000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:12:08.761514', 'step': 7000, 'epoch': 3} {'type': 'loss', 'content': 0.0008282113703899086, 'timestamp': '2025-09-30 23:12:08.779950', 'step': 7001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:08.875386', 'step': 7001, 'epoch': 3} {'type': 'loss', 'content': 7.132587052183226e-05, 'timestamp': '2025-09-30 23:12:08.878954', 'step': 7002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:08.984296', 'step': 7002, 'epoch': 3} {'type': 'loss', 'content': 4.559259832603857e-05, 'timestamp': '2025-09-30 23:12:09.001626', 'step': 7003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:12:09.083539', 'step': 7003, 'epoch': 3} {'type': 'loss', 'content': 6.487327482318506e-05, 'timestamp': '2025-09-30 23:12:09.103091', 'step': 7004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:09.172263', 'step': 7004, 'epoch': 3} {'type': 'loss', 'content': 0.022435391321778297, 'timestamp': '2025-09-30 23:12:09.186726', 'step': 7005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:12:09.283066', 'step': 7005, 'epoch': 3} {'type': 'loss', 'content': 1.7696627764962614e-05, 'timestamp': '2025-09-30 23:12:09.295623', 'step': 7006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:09.385233', 'step': 7006, 'epoch': 3} {'type': 'loss', 'content': 0.04526594653725624, 'timestamp': '2025-09-30 23:12:09.395906', 'step': 7007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:12:09.468473', 'step': 7007, 'epoch': 3} {'type': 'loss', 'content': 0.0006925383349880576, 'timestamp': '2025-09-30 23:12:09.484343', 'step': 7008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:09.549658', 'step': 7008, 'epoch': 3} {'type': 'loss', 'content': 2.0685514755314216e-05, 'timestamp': '2025-09-30 23:12:09.560502', 'step': 7009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:09.652635', 'step': 7009, 'epoch': 3} {'type': 'loss', 'content': 0.00016348413191735744, 'timestamp': '2025-09-30 23:12:09.662229', 'step': 7010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:09.743674', 'step': 7010, 'epoch': 3} {'type': 'loss', 'content': 0.0004572182078845799, 'timestamp': '2025-09-30 23:12:09.752258', 'step': 7011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:09.826934', 'step': 7011, 'epoch': 3} {'type': 'loss', 'content': 0.0013813200639560819, 'timestamp': '2025-09-30 23:12:09.834153', 'step': 7012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:09.898796', 'step': 7012, 'epoch': 3} {'type': 'loss', 'content': 3.4963850339408964e-05, 'timestamp': '2025-09-30 23:12:09.902802', 'step': 7013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:09.967675', 'step': 7013, 'epoch': 3} {'type': 'loss', 'content': 0.014531193301081657, 'timestamp': '2025-09-30 23:12:09.972070', 'step': 7014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:12:10.039283', 'step': 7014, 'epoch': 3} {'type': 'loss', 'content': 0.0017022559186443686, 'timestamp': '2025-09-30 23:12:10.043181', 'step': 7015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:10.113314', 'step': 7015, 'epoch': 3} {'type': 'loss', 'content': 0.004896983038634062, 'timestamp': '2025-09-30 23:12:10.121833', 'step': 7016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:10.205778', 'step': 7016, 'epoch': 3} {'type': 'loss', 'content': 0.0001089979414246045, 'timestamp': '2025-09-30 23:12:10.223550', 'step': 7017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:10.351996', 'step': 7017, 'epoch': 3} {'type': 'loss', 'content': 0.0003261819074396044, 'timestamp': '2025-09-30 23:12:10.357702', 'step': 7018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:10.440442', 'step': 7018, 'epoch': 3} {'type': 'loss', 'content': 0.00494782580062747, 'timestamp': '2025-09-30 23:12:10.459327', 'step': 7019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:10.567163', 'step': 7019, 'epoch': 3} {'type': 'loss', 'content': 0.0006498059956356883, 'timestamp': '2025-09-30 23:12:10.575765', 'step': 7020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:10.658287', 'step': 7020, 'epoch': 3} {'type': 'loss', 'content': 0.00012203152436995879, 'timestamp': '2025-09-30 23:12:10.662975', 'step': 7021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:10.737605', 'step': 7021, 'epoch': 3} {'type': 'loss', 'content': 0.0034956536255776882, 'timestamp': '2025-09-30 23:12:10.741605', 'step': 7022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:12:10.809827', 'step': 7022, 'epoch': 3} {'type': 'loss', 'content': 0.0011860233498737216, 'timestamp': '2025-09-30 23:12:10.821034', 'step': 7023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:10.889160', 'step': 7023, 'epoch': 3} {'type': 'loss', 'content': 0.00048351145233027637, 'timestamp': '2025-09-30 23:12:10.896880', 'step': 7024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:10.965646', 'step': 7024, 'epoch': 3} {'type': 'loss', 'content': 0.0029507868457585573, 'timestamp': '2025-09-30 23:12:10.970837', 'step': 7025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:11.040717', 'step': 7025, 'epoch': 3} {'type': 'loss', 'content': 0.0025146938860416412, 'timestamp': '2025-09-30 23:12:11.043475', 'step': 7026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:11.100406', 'step': 7026, 'epoch': 3} {'type': 'loss', 'content': 0.05284295603632927, 'timestamp': '2025-09-30 23:12:11.103281', 'step': 7027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:11.169794', 'step': 7027, 'epoch': 3} {'type': 'loss', 'content': 6.954454147489741e-05, 'timestamp': '2025-09-30 23:12:11.181210', 'step': 7028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:11.255513', 'step': 7028, 'epoch': 3} {'type': 'loss', 'content': 0.0005827491404488683, 'timestamp': '2025-09-30 23:12:11.258828', 'step': 7029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:11.320752', 'step': 7029, 'epoch': 3} {'type': 'loss', 'content': 0.00019449398678261787, 'timestamp': '2025-09-30 23:12:11.326622', 'step': 7030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:11.382220', 'step': 7030, 'epoch': 3} {'type': 'loss', 'content': 0.0008276646840386093, 'timestamp': '2025-09-30 23:12:11.385836', 'step': 7031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:11.444318', 'step': 7031, 'epoch': 3} {'type': 'loss', 'content': 0.0005132805090397596, 'timestamp': '2025-09-30 23:12:11.450953', 'step': 7032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:11.517033', 'step': 7032, 'epoch': 3} {'type': 'loss', 'content': 0.016258232295513153, 'timestamp': '2025-09-30 23:12:11.522125', 'step': 7033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:11.578462', 'step': 7033, 'epoch': 3} {'type': 'loss', 'content': 5.728072937927209e-05, 'timestamp': '2025-09-30 23:12:11.587850', 'step': 7034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:11.661939', 'step': 7034, 'epoch': 3} {'type': 'loss', 'content': 0.00011313369759591296, 'timestamp': '2025-09-30 23:12:11.665401', 'step': 7035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:11.724556', 'step': 7035, 'epoch': 3} {'type': 'loss', 'content': 1.7256232240470126e-05, 'timestamp': '2025-09-30 23:12:11.733424', 'step': 7036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:11.811634', 'step': 7036, 'epoch': 3} {'type': 'loss', 'content': 0.004682761617004871, 'timestamp': '2025-09-30 23:12:11.819893', 'step': 7037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:11.886263', 'step': 7037, 'epoch': 3} {'type': 'loss', 'content': 0.001250697998329997, 'timestamp': '2025-09-30 23:12:11.889617', 'step': 7038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:11.964068', 'step': 7038, 'epoch': 3} {'type': 'loss', 'content': 0.0066428231075406075, 'timestamp': '2025-09-30 23:12:11.970713', 'step': 7039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:12.046249', 'step': 7039, 'epoch': 3} {'type': 'loss', 'content': 0.00016524696548003703, 'timestamp': '2025-09-30 23:12:12.054967', 'step': 7040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:12.107717', 'step': 7040, 'epoch': 3} {'type': 'loss', 'content': 0.0017372036818414927, 'timestamp': '2025-09-30 23:12:12.110097', 'step': 7041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:12.163659', 'step': 7041, 'epoch': 3} {'type': 'loss', 'content': 0.0015227275434881449, 'timestamp': '2025-09-30 23:12:12.181099', 'step': 7042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:12.269125', 'step': 7042, 'epoch': 3} {'type': 'loss', 'content': 7.681317947572097e-05, 'timestamp': '2025-09-30 23:12:12.274603', 'step': 7043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:12.343728', 'step': 7043, 'epoch': 3} {'type': 'loss', 'content': 0.0004749283252749592, 'timestamp': '2025-09-30 23:12:12.354788', 'step': 7044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:12.422261', 'step': 7044, 'epoch': 3} {'type': 'loss', 'content': 0.0002698084863368422, 'timestamp': '2025-09-30 23:12:12.429325', 'step': 7045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:12:12.497462', 'step': 7045, 'epoch': 3} {'type': 'loss', 'content': 0.0162531565874815, 'timestamp': '2025-09-30 23:12:12.500496', 'step': 7046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:12.563621', 'step': 7046, 'epoch': 3} {'type': 'loss', 'content': 0.005846288055181503, 'timestamp': '2025-09-30 23:12:12.570073', 'step': 7047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:12.642398', 'step': 7047, 'epoch': 3} {'type': 'loss', 'content': 7.11958491592668e-05, 'timestamp': '2025-09-30 23:12:12.649859', 'step': 7048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:12:12.716398', 'step': 7048, 'epoch': 3} {'type': 'loss', 'content': 7.091678708093241e-05, 'timestamp': '2025-09-30 23:12:12.721427', 'step': 7049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:12.780017', 'step': 7049, 'epoch': 3} {'type': 'loss', 'content': 0.003943169489502907, 'timestamp': '2025-09-30 23:12:12.782719', 'step': 7050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:12.846862', 'step': 7050, 'epoch': 3} {'type': 'loss', 'content': 0.0009062481112778187, 'timestamp': '2025-09-30 23:12:12.856000', 'step': 7051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:12.925661', 'step': 7051, 'epoch': 3} {'type': 'loss', 'content': 6.353572098305449e-05, 'timestamp': '2025-09-30 23:12:12.932824', 'step': 7052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:12.989372', 'step': 7052, 'epoch': 3} {'type': 'loss', 'content': 0.00011400311632314697, 'timestamp': '2025-09-30 23:12:12.996128', 'step': 7053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:13.059419', 'step': 7053, 'epoch': 3} {'type': 'loss', 'content': 0.04957105591893196, 'timestamp': '2025-09-30 23:12:13.067563', 'step': 7054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:12:13.138556', 'step': 7054, 'epoch': 3} {'type': 'loss', 'content': 0.00021971848036628217, 'timestamp': '2025-09-30 23:12:13.146156', 'step': 7055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:13.213271', 'step': 7055, 'epoch': 3} {'type': 'loss', 'content': 8.741393685340881e-05, 'timestamp': '2025-09-30 23:12:13.223570', 'step': 7056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:13.291822', 'step': 7056, 'epoch': 3} {'type': 'loss', 'content': 0.0025500531774014235, 'timestamp': '2025-09-30 23:12:13.300545', 'step': 7057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:13.377745', 'step': 7057, 'epoch': 3} {'type': 'loss', 'content': 5.336891263141297e-05, 'timestamp': '2025-09-30 23:12:13.381042', 'step': 7058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:12:13.448355', 'step': 7058, 'epoch': 3} {'type': 'loss', 'content': 0.003792239585891366, 'timestamp': '2025-09-30 23:12:13.451187', 'step': 7059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:13.516155', 'step': 7059, 'epoch': 3} {'type': 'loss', 'content': 8.516612433595583e-05, 'timestamp': '2025-09-30 23:12:13.528693', 'step': 7060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:13.590466', 'step': 7060, 'epoch': 3} {'type': 'loss', 'content': 0.001811456517316401, 'timestamp': '2025-09-30 23:12:13.600198', 'step': 7061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:13.663663', 'step': 7061, 'epoch': 3} {'type': 'loss', 'content': 0.0014370096614584327, 'timestamp': '2025-09-30 23:12:13.667257', 'step': 7062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:13.741352', 'step': 7062, 'epoch': 3} {'type': 'loss', 'content': 0.00015725087723694742, 'timestamp': '2025-09-30 23:12:13.749765', 'step': 7063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:13.816720', 'step': 7063, 'epoch': 3} {'type': 'loss', 'content': 0.0019836951978504658, 'timestamp': '2025-09-30 23:12:13.827315', 'step': 7064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:13.892340', 'step': 7064, 'epoch': 3} {'type': 'loss', 'content': 0.0003616230678744614, 'timestamp': '2025-09-30 23:12:13.898266', 'step': 7065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:13.960647', 'step': 7065, 'epoch': 3} {'type': 'loss', 'content': 0.003149342257529497, 'timestamp': '2025-09-30 23:12:13.963967', 'step': 7066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:14.029678', 'step': 7066, 'epoch': 3} {'type': 'loss', 'content': 9.209175186697394e-05, 'timestamp': '2025-09-30 23:12:14.035537', 'step': 7067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:12:14.100767', 'step': 7067, 'epoch': 3} {'type': 'loss', 'content': 0.0005514411022886634, 'timestamp': '2025-09-30 23:12:14.109845', 'step': 7068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:12:14.172666', 'step': 7068, 'epoch': 3} {'type': 'loss', 'content': 0.0005808740970678627, 'timestamp': '2025-09-30 23:12:14.175643', 'step': 7069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:12:14.244727', 'step': 7069, 'epoch': 3} {'type': 'loss', 'content': 0.0002013353951042518, 'timestamp': '2025-09-30 23:12:14.261545', 'step': 7070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:14.327278', 'step': 7070, 'epoch': 3} {'type': 'loss', 'content': 0.0020204000174999237, 'timestamp': '2025-09-30 23:12:14.335839', 'step': 7071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:14.407175', 'step': 7071, 'epoch': 3} {'type': 'loss', 'content': 0.005822985898703337, 'timestamp': '2025-09-30 23:12:14.415331', 'step': 7072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:14.477667', 'step': 7072, 'epoch': 3} {'type': 'loss', 'content': 0.004319666884839535, 'timestamp': '2025-09-30 23:12:14.482656', 'step': 7073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:14.545510', 'step': 7073, 'epoch': 3} {'type': 'loss', 'content': 0.0007446152740158141, 'timestamp': '2025-09-30 23:12:14.556323', 'step': 7074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:14.625118', 'step': 7074, 'epoch': 3} {'type': 'loss', 'content': 0.0015300174709409475, 'timestamp': '2025-09-30 23:12:14.630011', 'step': 7075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:12:14.691058', 'step': 7075, 'epoch': 3} {'type': 'loss', 'content': 0.013853175565600395, 'timestamp': '2025-09-30 23:12:14.707971', 'step': 7076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:14.810016', 'step': 7076, 'epoch': 3} {'type': 'loss', 'content': 0.0008018166408874094, 'timestamp': '2025-09-30 23:12:14.829581', 'step': 7077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:14.891490', 'step': 7077, 'epoch': 3} {'type': 'loss', 'content': 0.03957380726933479, 'timestamp': '2025-09-30 23:12:14.896890', 'step': 7078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:14.990632', 'step': 7078, 'epoch': 3} {'type': 'loss', 'content': 0.00010325007315259427, 'timestamp': '2025-09-30 23:12:14.993828', 'step': 7079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:12:15.076483', 'step': 7079, 'epoch': 3} {'type': 'loss', 'content': 0.0013402601471170783, 'timestamp': '2025-09-30 23:12:15.088743', 'step': 7080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:12:15.153529', 'step': 7080, 'epoch': 3} {'type': 'loss', 'content': 0.0007546384586021304, 'timestamp': '2025-09-30 23:12:15.157318', 'step': 7081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:15.219573', 'step': 7081, 'epoch': 3} {'type': 'loss', 'content': 0.00021466512407641858, 'timestamp': '2025-09-30 23:12:15.222061', 'step': 7082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:15.275324', 'step': 7082, 'epoch': 3} {'type': 'loss', 'content': 4.379873280413449e-05, 'timestamp': '2025-09-30 23:12:15.277830', 'step': 7083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:15.333891', 'step': 7083, 'epoch': 3} {'type': 'loss', 'content': 0.002458480652421713, 'timestamp': '2025-09-30 23:12:15.342283', 'step': 7084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:15.397793', 'step': 7084, 'epoch': 3} {'type': 'loss', 'content': 0.0003393541555851698, 'timestamp': '2025-09-30 23:12:15.400182', 'step': 7085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:15.454924', 'step': 7085, 'epoch': 3} {'type': 'loss', 'content': 0.0012434069067239761, 'timestamp': '2025-09-30 23:12:15.458711', 'step': 7086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:15.521425', 'step': 7086, 'epoch': 3} {'type': 'loss', 'content': 0.04260122403502464, 'timestamp': '2025-09-30 23:12:15.530636', 'step': 7087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:15.602542', 'step': 7087, 'epoch': 3} {'type': 'loss', 'content': 0.001220524194650352, 'timestamp': '2025-09-30 23:12:15.616770', 'step': 7088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:12:15.683993', 'step': 7088, 'epoch': 3} {'type': 'loss', 'content': 4.960131263942458e-05, 'timestamp': '2025-09-30 23:12:15.688535', 'step': 7089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:15.766335', 'step': 7089, 'epoch': 3} {'type': 'loss', 'content': 0.00031859680893830955, 'timestamp': '2025-09-30 23:12:15.769396', 'step': 7090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:12:15.844634', 'step': 7090, 'epoch': 3} {'type': 'loss', 'content': 0.0024516696576029062, 'timestamp': '2025-09-30 23:12:15.850210', 'step': 7091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:12:15.910783', 'step': 7091, 'epoch': 3} {'type': 'loss', 'content': 4.411844565765932e-05, 'timestamp': '2025-09-30 23:12:15.920274', 'step': 7092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:15.977182', 'step': 7092, 'epoch': 3} {'type': 'loss', 'content': 0.0003045520279556513, 'timestamp': '2025-09-30 23:12:15.979415', 'step': 7093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:16.034733', 'step': 7093, 'epoch': 3} {'type': 'loss', 'content': 5.9340643929317594e-05, 'timestamp': '2025-09-30 23:12:16.037016', 'step': 7094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:16.089826', 'step': 7094, 'epoch': 3} {'type': 'loss', 'content': 9.47148073464632e-05, 'timestamp': '2025-09-30 23:12:16.093484', 'step': 7095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:16.152799', 'step': 7095, 'epoch': 3} {'type': 'loss', 'content': 1.615596738702152e-05, 'timestamp': '2025-09-30 23:12:16.161504', 'step': 7096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:16.224092', 'step': 7096, 'epoch': 3} {'type': 'loss', 'content': 2.0333705833763815e-05, 'timestamp': '2025-09-30 23:12:16.227991', 'step': 7097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:16.304036', 'step': 7097, 'epoch': 3} {'type': 'loss', 'content': 0.0007240799022838473, 'timestamp': '2025-09-30 23:12:16.311600', 'step': 7098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:16.372798', 'step': 7098, 'epoch': 3} {'type': 'loss', 'content': 1.469068320147926e-05, 'timestamp': '2025-09-30 23:12:16.382444', 'step': 7099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:16.449936', 'step': 7099, 'epoch': 3} {'type': 'loss', 'content': 0.00033721746876835823, 'timestamp': '2025-09-30 23:12:16.465361', 'step': 7100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:16.521801', 'step': 7100, 'epoch': 3} {'type': 'loss', 'content': 0.00010882804781431332, 'timestamp': '2025-09-30 23:12:16.529982', 'step': 7101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:16.604540', 'step': 7101, 'epoch': 3} {'type': 'loss', 'content': 0.00062426773365587, 'timestamp': '2025-09-30 23:12:16.614181', 'step': 7102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:16.687896', 'step': 7102, 'epoch': 3} {'type': 'loss', 'content': 0.00011061070108553395, 'timestamp': '2025-09-30 23:12:16.692940', 'step': 7103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:16.752359', 'step': 7103, 'epoch': 3} {'type': 'loss', 'content': 0.0002248767705168575, 'timestamp': '2025-09-30 23:12:16.764377', 'step': 7104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:16.838148', 'step': 7104, 'epoch': 3} {'type': 'loss', 'content': 9.08935398911126e-05, 'timestamp': '2025-09-30 23:12:16.847149', 'step': 7105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:16.922133', 'step': 7105, 'epoch': 3} {'type': 'loss', 'content': 0.00047911269939504564, 'timestamp': '2025-09-30 23:12:16.927393', 'step': 7106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:16.990081', 'step': 7106, 'epoch': 3} {'type': 'loss', 'content': 0.006309490650892258, 'timestamp': '2025-09-30 23:12:17.000060', 'step': 7107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:17.068152', 'step': 7107, 'epoch': 3} {'type': 'loss', 'content': 0.01530434750020504, 'timestamp': '2025-09-30 23:12:17.083669', 'step': 7108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:17.144597', 'step': 7108, 'epoch': 3} {'type': 'loss', 'content': 0.00011174430983373895, 'timestamp': '2025-09-30 23:12:17.162342', 'step': 7109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:17.272301', 'step': 7109, 'epoch': 3} {'type': 'loss', 'content': 0.0010279153939336538, 'timestamp': '2025-09-30 23:12:17.277478', 'step': 7110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:17.341192', 'step': 7110, 'epoch': 3} {'type': 'loss', 'content': 6.628689880017191e-05, 'timestamp': '2025-09-30 23:12:17.348993', 'step': 7111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:17.418898', 'step': 7111, 'epoch': 3} {'type': 'loss', 'content': 0.00019010851974599063, 'timestamp': '2025-09-30 23:12:17.430115', 'step': 7112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:17.494542', 'step': 7112, 'epoch': 3} {'type': 'loss', 'content': 0.0018038692651316524, 'timestamp': '2025-09-30 23:12:17.502039', 'step': 7113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:17.572231', 'step': 7113, 'epoch': 3} {'type': 'loss', 'content': 0.0009045050828717649, 'timestamp': '2025-09-30 23:12:17.578463', 'step': 7114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:17.640677', 'step': 7114, 'epoch': 3} {'type': 'loss', 'content': 0.008898362517356873, 'timestamp': '2025-09-30 23:12:17.648980', 'step': 7115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:17.716143', 'step': 7115, 'epoch': 3} {'type': 'loss', 'content': 0.00021710015425924212, 'timestamp': '2025-09-30 23:12:17.726765', 'step': 7116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:17.802667', 'step': 7116, 'epoch': 3} {'type': 'loss', 'content': 0.0005628464859910309, 'timestamp': '2025-09-30 23:12:17.810570', 'step': 7117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:17.867963', 'step': 7117, 'epoch': 3} {'type': 'loss', 'content': 0.014421123079955578, 'timestamp': '2025-09-30 23:12:17.870790', 'step': 7118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:17.938089', 'step': 7118, 'epoch': 3} {'type': 'loss', 'content': 0.022952262312173843, 'timestamp': '2025-09-30 23:12:17.940362', 'step': 7119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:18.008484', 'step': 7119, 'epoch': 3} {'type': 'loss', 'content': 0.011015989817678928, 'timestamp': '2025-09-30 23:12:18.019990', 'step': 7120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:18.084714', 'step': 7120, 'epoch': 3} {'type': 'loss', 'content': 0.020288558676838875, 'timestamp': '2025-09-30 23:12:18.093417', 'step': 7121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:12:18.157929', 'step': 7121, 'epoch': 3} {'type': 'loss', 'content': 3.4051852708216757e-05, 'timestamp': '2025-09-30 23:12:18.161818', 'step': 7122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:18.225374', 'step': 7122, 'epoch': 3} {'type': 'loss', 'content': 0.004483658354729414, 'timestamp': '2025-09-30 23:12:18.228888', 'step': 7123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:12:18.295075', 'step': 7123, 'epoch': 3} {'type': 'loss', 'content': 0.00016903803043533117, 'timestamp': '2025-09-30 23:12:18.305933', 'step': 7124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:18.386693', 'step': 7124, 'epoch': 3} {'type': 'loss', 'content': 0.026187879964709282, 'timestamp': '2025-09-30 23:12:18.397421', 'step': 7125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:18.460830', 'step': 7125, 'epoch': 3} {'type': 'loss', 'content': 0.005968824494630098, 'timestamp': '2025-09-30 23:12:18.469295', 'step': 7126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:18.547016', 'step': 7126, 'epoch': 3} {'type': 'loss', 'content': 0.00027626767405308783, 'timestamp': '2025-09-30 23:12:18.557893', 'step': 7127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:12:18.636775', 'step': 7127, 'epoch': 3} {'type': 'loss', 'content': 0.0003822748258244246, 'timestamp': '2025-09-30 23:12:18.643820', 'step': 7128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:12:18.722838', 'step': 7128, 'epoch': 3} {'type': 'loss', 'content': 0.0007325946353375912, 'timestamp': '2025-09-30 23:12:18.731435', 'step': 7129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:18.811556', 'step': 7129, 'epoch': 3} {'type': 'loss', 'content': 0.00863740872591734, 'timestamp': '2025-09-30 23:12:18.821876', 'step': 7130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:12:18.900695', 'step': 7130, 'epoch': 3} {'type': 'loss', 'content': 0.00046719584497623146, 'timestamp': '2025-09-30 23:12:18.909483', 'step': 7131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:12:18.992755', 'step': 7131, 'epoch': 3} {'type': 'loss', 'content': 0.00033824375714175403, 'timestamp': '2025-09-30 23:12:19.006087', 'step': 7132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:19.081482', 'step': 7132, 'epoch': 3} {'type': 'loss', 'content': 0.0001373329432681203, 'timestamp': '2025-09-30 23:12:19.092265', 'step': 7133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:19.166059', 'step': 7133, 'epoch': 3} {'type': 'loss', 'content': 0.07372452318668365, 'timestamp': '2025-09-30 23:12:19.174416', 'step': 7134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:19.246652', 'step': 7134, 'epoch': 3} {'type': 'loss', 'content': 0.0013372370740398765, 'timestamp': '2025-09-30 23:12:19.255041', 'step': 7135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:12:19.337297', 'step': 7135, 'epoch': 3} {'type': 'loss', 'content': 0.0056486232206225395, 'timestamp': '2025-09-30 23:12:19.348848', 'step': 7136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:12:19.419639', 'step': 7136, 'epoch': 3} {'type': 'loss', 'content': 0.00016028460231609643, 'timestamp': '2025-09-30 23:12:19.430641', 'step': 7137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:19.506852', 'step': 7137, 'epoch': 3} {'type': 'loss', 'content': 0.00168402842245996, 'timestamp': '2025-09-30 23:12:19.515026', 'step': 7138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:12:19.592818', 'step': 7138, 'epoch': 3} {'type': 'loss', 'content': 0.017636168748140335, 'timestamp': '2025-09-30 23:12:19.599651', 'step': 7139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:19.657754', 'step': 7139, 'epoch': 3} {'type': 'loss', 'content': 0.0005899281240999699, 'timestamp': '2025-09-30 23:12:19.670287', 'step': 7140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:19.757152', 'step': 7140, 'epoch': 3} {'type': 'loss', 'content': 0.002494461601600051, 'timestamp': '2025-09-30 23:12:19.769236', 'step': 7141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:19.839131', 'step': 7141, 'epoch': 3} {'type': 'loss', 'content': 0.051229286938905716, 'timestamp': '2025-09-30 23:12:19.842948', 'step': 7142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:19.923920', 'step': 7142, 'epoch': 3} {'type': 'loss', 'content': 0.0002826227282639593, 'timestamp': '2025-09-30 23:12:19.932846', 'step': 7143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:20.009299', 'step': 7143, 'epoch': 3} {'type': 'loss', 'content': 0.004258168861269951, 'timestamp': '2025-09-30 23:12:20.023948', 'step': 7144, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1596914505344}], 'timestamp': '2025-09-30 23:12:25.886659', 'step': 7144, 'epoch': 3} {'type': 'pplx', 'content': 7256175.310244441, 'timestamp': '2025-09-30 23:12:25.892167', 'step': 7144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:25.954647', 'step': 7144, 'epoch': 3} {'type': 'loss', 'content': 0.00264148972928524, 'timestamp': '2025-09-30 23:12:25.959737', 'step': 7145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:12:26.023274', 'step': 7145, 'epoch': 3} {'type': 'loss', 'content': 0.00015830919437576085, 'timestamp': '2025-09-30 23:12:26.027537', 'step': 7146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:12:26.086543', 'step': 7146, 'epoch': 3} {'type': 'loss', 'content': 0.04188466817140579, 'timestamp': '2025-09-30 23:12:26.089249', 'step': 7147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:26.146783', 'step': 7147, 'epoch': 3} {'type': 'loss', 'content': 0.001967097632586956, 'timestamp': '2025-09-30 23:12:26.153339', 'step': 7148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:26.211593', 'step': 7148, 'epoch': 3} {'type': 'loss', 'content': 0.0009212433942593634, 'timestamp': '2025-09-30 23:12:26.216843', 'step': 7149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:26.276709', 'step': 7149, 'epoch': 3} {'type': 'loss', 'content': 0.008065203204751015, 'timestamp': '2025-09-30 23:12:26.279714', 'step': 7150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:26.338781', 'step': 7150, 'epoch': 3} {'type': 'loss', 'content': 0.0011306308442726731, 'timestamp': '2025-09-30 23:12:26.341391', 'step': 7151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:26.419710', 'step': 7151, 'epoch': 3} {'type': 'loss', 'content': 0.001382448012009263, 'timestamp': '2025-09-30 23:12:26.430135', 'step': 7152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:26.494622', 'step': 7152, 'epoch': 3} {'type': 'loss', 'content': 0.0007524380343966186, 'timestamp': '2025-09-30 23:12:26.501595', 'step': 7153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:26.573518', 'step': 7153, 'epoch': 3} {'type': 'loss', 'content': 0.008512438274919987, 'timestamp': '2025-09-30 23:12:26.582785', 'step': 7154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:26.668077', 'step': 7154, 'epoch': 3} {'type': 'loss', 'content': 0.00020534437499009073, 'timestamp': '2025-09-30 23:12:26.681744', 'step': 7155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:26.781127', 'step': 7155, 'epoch': 3} {'type': 'loss', 'content': 0.00032191426726058125, 'timestamp': '2025-09-30 23:12:26.804261', 'step': 7156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:26.934460', 'step': 7156, 'epoch': 3} {'type': 'loss', 'content': 0.00013134365144651383, 'timestamp': '2025-09-30 23:12:26.940492', 'step': 7157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:27.037766', 'step': 7157, 'epoch': 3} {'type': 'loss', 'content': 0.0009045658516697586, 'timestamp': '2025-09-30 23:12:27.053382', 'step': 7158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:27.143464', 'step': 7158, 'epoch': 3} {'type': 'loss', 'content': 0.003480084938928485, 'timestamp': '2025-09-30 23:12:27.152783', 'step': 7159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:27.227764', 'step': 7159, 'epoch': 3} {'type': 'loss', 'content': 7.06419741618447e-05, 'timestamp': '2025-09-30 23:12:27.240714', 'step': 7160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:12:27.312821', 'step': 7160, 'epoch': 3} {'type': 'loss', 'content': 0.0010117681231349707, 'timestamp': '2025-09-30 23:12:27.319231', 'step': 7161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:12:27.394385', 'step': 7161, 'epoch': 3} {'type': 'loss', 'content': 0.03610571101307869, 'timestamp': '2025-09-30 23:12:27.400827', 'step': 7162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:27.463647', 'step': 7162, 'epoch': 3} {'type': 'loss', 'content': 0.000703250989317894, 'timestamp': '2025-09-30 23:12:27.468738', 'step': 7163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:12:27.538724', 'step': 7163, 'epoch': 3} {'type': 'loss', 'content': 0.0002775740285869688, 'timestamp': '2025-09-30 23:12:27.546820', 'step': 7164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:27.607250', 'step': 7164, 'epoch': 3} {'type': 'loss', 'content': 0.00010402569751022384, 'timestamp': '2025-09-30 23:12:27.613168', 'step': 7165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:27.673902', 'step': 7165, 'epoch': 3} {'type': 'loss', 'content': 0.0009889291832223535, 'timestamp': '2025-09-30 23:12:27.680098', 'step': 7166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:27.747199', 'step': 7166, 'epoch': 3} {'type': 'loss', 'content': 0.03934922814369202, 'timestamp': '2025-09-30 23:12:27.751696', 'step': 7167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:27.825380', 'step': 7167, 'epoch': 3} {'type': 'loss', 'content': 0.0012195607414469123, 'timestamp': '2025-09-30 23:12:27.834812', 'step': 7168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:27.902663', 'step': 7168, 'epoch': 3} {'type': 'loss', 'content': 0.002115086652338505, 'timestamp': '2025-09-30 23:12:27.910242', 'step': 7169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:27.979843', 'step': 7169, 'epoch': 3} {'type': 'loss', 'content': 9.882570884656161e-05, 'timestamp': '2025-09-30 23:12:27.988279', 'step': 7170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:12:28.072081', 'step': 7170, 'epoch': 3} {'type': 'loss', 'content': 0.0004894834710285068, 'timestamp': '2025-09-30 23:12:28.084001', 'step': 7171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:28.165717', 'step': 7171, 'epoch': 3} {'type': 'loss', 'content': 0.000791451777331531, 'timestamp': '2025-09-30 23:12:28.178930', 'step': 7172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:28.275195', 'step': 7172, 'epoch': 3} {'type': 'loss', 'content': 0.002284115878865123, 'timestamp': '2025-09-30 23:12:28.286350', 'step': 7173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:28.367603', 'step': 7173, 'epoch': 3} {'type': 'loss', 'content': 0.0005583027377724648, 'timestamp': '2025-09-30 23:12:28.371420', 'step': 7174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:28.428818', 'step': 7174, 'epoch': 3} {'type': 'loss', 'content': 0.0048610614612698555, 'timestamp': '2025-09-30 23:12:28.438261', 'step': 7175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:28.518868', 'step': 7175, 'epoch': 3} {'type': 'loss', 'content': 0.014646795578300953, 'timestamp': '2025-09-30 23:12:28.533418', 'step': 7176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:28.619914', 'step': 7176, 'epoch': 3} {'type': 'loss', 'content': 0.032737188041210175, 'timestamp': '2025-09-30 23:12:28.625528', 'step': 7177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:28.694736', 'step': 7177, 'epoch': 3} {'type': 'loss', 'content': 0.0001127649302361533, 'timestamp': '2025-09-30 23:12:28.704331', 'step': 7178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:28.788631', 'step': 7178, 'epoch': 3} {'type': 'loss', 'content': 0.0002379153302172199, 'timestamp': '2025-09-30 23:12:28.798781', 'step': 7179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:28.884295', 'step': 7179, 'epoch': 3} {'type': 'loss', 'content': 0.0008107406902126968, 'timestamp': '2025-09-30 23:12:28.903551', 'step': 7180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:29.004129', 'step': 7180, 'epoch': 3} {'type': 'loss', 'content': 0.0019656389486044645, 'timestamp': '2025-09-30 23:12:29.016288', 'step': 7181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:29.101505', 'step': 7181, 'epoch': 3} {'type': 'loss', 'content': 0.0010593356564640999, 'timestamp': '2025-09-30 23:12:29.113987', 'step': 7182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:29.196590', 'step': 7182, 'epoch': 3} {'type': 'loss', 'content': 0.0029793812427669764, 'timestamp': '2025-09-30 23:12:29.208608', 'step': 7183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:29.286362', 'step': 7183, 'epoch': 3} {'type': 'loss', 'content': 0.0013064276427030563, 'timestamp': '2025-09-30 23:12:29.300309', 'step': 7184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:12:29.375501', 'step': 7184, 'epoch': 3} {'type': 'loss', 'content': 0.022389857098460197, 'timestamp': '2025-09-30 23:12:29.390325', 'step': 7185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:29.477614', 'step': 7185, 'epoch': 3} {'type': 'loss', 'content': 0.001259625656530261, 'timestamp': '2025-09-30 23:12:29.486968', 'step': 7186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:29.569772', 'step': 7186, 'epoch': 3} {'type': 'loss', 'content': 0.00016105198301374912, 'timestamp': '2025-09-30 23:12:29.584822', 'step': 7187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:12:29.672392', 'step': 7187, 'epoch': 3} {'type': 'loss', 'content': 0.0037788297049701214, 'timestamp': '2025-09-30 23:12:29.687706', 'step': 7188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:29.773778', 'step': 7188, 'epoch': 3} {'type': 'loss', 'content': 0.013865814544260502, 'timestamp': '2025-09-30 23:12:29.786184', 'step': 7189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:29.874647', 'step': 7189, 'epoch': 3} {'type': 'loss', 'content': 4.686872489401139e-05, 'timestamp': '2025-09-30 23:12:29.879435', 'step': 7190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:12:29.936739', 'step': 7190, 'epoch': 3} {'type': 'loss', 'content': 0.00021029883646406233, 'timestamp': '2025-09-30 23:12:29.948996', 'step': 7191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:30.039097', 'step': 7191, 'epoch': 3} {'type': 'loss', 'content': 0.0007889016414992511, 'timestamp': '2025-09-30 23:12:30.057758', 'step': 7192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:30.143463', 'step': 7192, 'epoch': 3} {'type': 'loss', 'content': 0.0034821550361812115, 'timestamp': '2025-09-30 23:12:30.156166', 'step': 7193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:30.238873', 'step': 7193, 'epoch': 3} {'type': 'loss', 'content': 0.002171142026782036, 'timestamp': '2025-09-30 23:12:30.250971', 'step': 7194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:30.343314', 'step': 7194, 'epoch': 3} {'type': 'loss', 'content': 0.002668210072442889, 'timestamp': '2025-09-30 23:12:30.348509', 'step': 7195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:30.426217', 'step': 7195, 'epoch': 3} {'type': 'loss', 'content': 0.008484646677970886, 'timestamp': '2025-09-30 23:12:30.434173', 'step': 7196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:30.531006', 'step': 7196, 'epoch': 3} {'type': 'loss', 'content': 6.284169649006799e-05, 'timestamp': '2025-09-30 23:12:30.545742', 'step': 7197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:30.605181', 'step': 7197, 'epoch': 3} {'type': 'loss', 'content': 0.02593093179166317, 'timestamp': '2025-09-30 23:12:30.616326', 'step': 7198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:30.702728', 'step': 7198, 'epoch': 3} {'type': 'loss', 'content': 0.00112907646689564, 'timestamp': '2025-09-30 23:12:30.713788', 'step': 7199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:30.803876', 'step': 7199, 'epoch': 3} {'type': 'loss', 'content': 0.00010457821190357208, 'timestamp': '2025-09-30 23:12:30.820330', 'step': 7200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:30.913107', 'step': 7200, 'epoch': 3} {'type': 'loss', 'content': 0.00021009122428949922, 'timestamp': '2025-09-30 23:12:30.926044', 'step': 7201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:30.997825', 'step': 7201, 'epoch': 3} {'type': 'loss', 'content': 0.0014902731636539102, 'timestamp': '2025-09-30 23:12:31.008183', 'step': 7202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:31.093780', 'step': 7202, 'epoch': 3} {'type': 'loss', 'content': 0.0016974310856312513, 'timestamp': '2025-09-30 23:12:31.104556', 'step': 7203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:12:31.181095', 'step': 7203, 'epoch': 3} {'type': 'loss', 'content': 0.00029279751470312476, 'timestamp': '2025-09-30 23:12:31.196555', 'step': 7204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:31.280926', 'step': 7204, 'epoch': 3} {'type': 'loss', 'content': 0.0005877813673578203, 'timestamp': '2025-09-30 23:12:31.295194', 'step': 7205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:31.370037', 'step': 7205, 'epoch': 3} {'type': 'loss', 'content': 0.002224754774942994, 'timestamp': '2025-09-30 23:12:31.383960', 'step': 7206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:12:31.464749', 'step': 7206, 'epoch': 3} {'type': 'loss', 'content': 0.00020519782265182585, 'timestamp': '2025-09-30 23:12:31.473820', 'step': 7207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:31.565560', 'step': 7207, 'epoch': 3} {'type': 'loss', 'content': 0.004110101144760847, 'timestamp': '2025-09-30 23:12:31.572475', 'step': 7208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:31.658493', 'step': 7208, 'epoch': 3} {'type': 'loss', 'content': 0.00043097950401715934, 'timestamp': '2025-09-30 23:12:31.662671', 'step': 7209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:31.730787', 'step': 7209, 'epoch': 3} {'type': 'loss', 'content': 0.00010458566976012662, 'timestamp': '2025-09-30 23:12:31.743165', 'step': 7210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:31.832530', 'step': 7210, 'epoch': 3} {'type': 'loss', 'content': 0.007365785073488951, 'timestamp': '2025-09-30 23:12:31.844587', 'step': 7211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:31.935910', 'step': 7211, 'epoch': 3} {'type': 'loss', 'content': 0.01869054324924946, 'timestamp': '2025-09-30 23:12:31.952733', 'step': 7212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:32.067785', 'step': 7212, 'epoch': 3} {'type': 'loss', 'content': 0.00026410495047457516, 'timestamp': '2025-09-30 23:12:32.071292', 'step': 7213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:32.170221', 'step': 7213, 'epoch': 3} {'type': 'loss', 'content': 0.03954867273569107, 'timestamp': '2025-09-30 23:12:32.179226', 'step': 7214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:32.246820', 'step': 7214, 'epoch': 3} {'type': 'loss', 'content': 0.012096269056200981, 'timestamp': '2025-09-30 23:12:32.252692', 'step': 7215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:12:32.360804', 'step': 7215, 'epoch': 3} {'type': 'loss', 'content': 0.000397681025788188, 'timestamp': '2025-09-30 23:12:32.370281', 'step': 7216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:32.448629', 'step': 7216, 'epoch': 3} {'type': 'loss', 'content': 0.010080487467348576, 'timestamp': '2025-09-30 23:12:32.461460', 'step': 7217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:12:32.543374', 'step': 7217, 'epoch': 3} {'type': 'loss', 'content': 0.011653965339064598, 'timestamp': '2025-09-30 23:12:32.552217', 'step': 7218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:12:32.631004', 'step': 7218, 'epoch': 3} {'type': 'loss', 'content': 0.0005040091346018016, 'timestamp': '2025-09-30 23:12:32.636037', 'step': 7219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:32.711004', 'step': 7219, 'epoch': 3} {'type': 'loss', 'content': 0.033338699489831924, 'timestamp': '2025-09-30 23:12:32.718214', 'step': 7220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:12:32.791232', 'step': 7220, 'epoch': 3} {'type': 'loss', 'content': 0.015251257456839085, 'timestamp': '2025-09-30 23:12:32.802758', 'step': 7221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:32.893347', 'step': 7221, 'epoch': 3} {'type': 'loss', 'content': 0.009276451542973518, 'timestamp': '2025-09-30 23:12:32.903350', 'step': 7222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:32.991694', 'step': 7222, 'epoch': 3} {'type': 'loss', 'content': 0.007456892170011997, 'timestamp': '2025-09-30 23:12:33.001555', 'step': 7223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:33.086525', 'step': 7223, 'epoch': 3} {'type': 'loss', 'content': 0.0023246128112077713, 'timestamp': '2025-09-30 23:12:33.107470', 'step': 7224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:33.203922', 'step': 7224, 'epoch': 3} {'type': 'loss', 'content': 4.146420178585686e-05, 'timestamp': '2025-09-30 23:12:33.221340', 'step': 7225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 2560015608320.0}, 'timestamp': '2025-09-30 23:12:33.319836', 'step': 7225, 'epoch': 3} {'type': 'loss', 'content': 0.01918911375105381, 'timestamp': '2025-09-30 23:12:33.326128', 'step': 7226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:33.423979', 'step': 7226, 'epoch': 3} {'type': 'loss', 'content': 4.177308801445179e-05, 'timestamp': '2025-09-30 23:12:33.441983', 'step': 7227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:33.519574', 'step': 7227, 'epoch': 3} {'type': 'loss', 'content': 0.016531018540263176, 'timestamp': '2025-09-30 23:12:33.532592', 'step': 7228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:12:33.612069', 'step': 7228, 'epoch': 3} {'type': 'loss', 'content': 0.005578470416367054, 'timestamp': '2025-09-30 23:12:33.625603', 'step': 7229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:12:33.699712', 'step': 7229, 'epoch': 3} {'type': 'loss', 'content': 0.001119342283345759, 'timestamp': '2025-09-30 23:12:33.713440', 'step': 7230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:33.804069', 'step': 7230, 'epoch': 3} {'type': 'loss', 'content': 0.0021791872568428516, 'timestamp': '2025-09-30 23:12:33.816054', 'step': 7231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:33.899592', 'step': 7231, 'epoch': 3} {'type': 'loss', 'content': 0.0018980542663484812, 'timestamp': '2025-09-30 23:12:33.916752', 'step': 7232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:33.982721', 'step': 7232, 'epoch': 3} {'type': 'loss', 'content': 0.03531551733613014, 'timestamp': '2025-09-30 23:12:33.985979', 'step': 7233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:34.062761', 'step': 7233, 'epoch': 3} {'type': 'loss', 'content': 0.006886827759444714, 'timestamp': '2025-09-30 23:12:34.075079', 'step': 7234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:34.161473', 'step': 7234, 'epoch': 3} {'type': 'loss', 'content': 0.010435022413730621, 'timestamp': '2025-09-30 23:12:34.172709', 'step': 7235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:34.259417', 'step': 7235, 'epoch': 3} {'type': 'loss', 'content': 0.0005737237515859306, 'timestamp': '2025-09-30 23:12:34.278068', 'step': 7236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:34.370568', 'step': 7236, 'epoch': 3} {'type': 'loss', 'content': 0.002676040632650256, 'timestamp': '2025-09-30 23:12:34.381797', 'step': 7237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:34.468311', 'step': 7237, 'epoch': 3} {'type': 'loss', 'content': 0.0026921869721263647, 'timestamp': '2025-09-30 23:12:34.470703', 'step': 7238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:34.527074', 'step': 7238, 'epoch': 3} {'type': 'loss', 'content': 0.0007360400632023811, 'timestamp': '2025-09-30 23:12:34.530738', 'step': 7239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:34.615115', 'step': 7239, 'epoch': 3} {'type': 'loss', 'content': 0.0003677151689771563, 'timestamp': '2025-09-30 23:12:34.632501', 'step': 7240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:34.718702', 'step': 7240, 'epoch': 3} {'type': 'loss', 'content': 0.001907397760078311, 'timestamp': '2025-09-30 23:12:34.723015', 'step': 7241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:34.821617', 'step': 7241, 'epoch': 3} {'type': 'loss', 'content': 7.711071521043777e-05, 'timestamp': '2025-09-30 23:12:34.828650', 'step': 7242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:34.915161', 'step': 7242, 'epoch': 3} {'type': 'loss', 'content': 0.009810577146708965, 'timestamp': '2025-09-30 23:12:34.936460', 'step': 7243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:35.020549', 'step': 7243, 'epoch': 3} {'type': 'loss', 'content': 3.754612771444954e-05, 'timestamp': '2025-09-30 23:12:35.031997', 'step': 7244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:35.103516', 'step': 7244, 'epoch': 3} {'type': 'loss', 'content': 2.145974758605007e-05, 'timestamp': '2025-09-30 23:12:35.120454', 'step': 7245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:12:35.208844', 'step': 7245, 'epoch': 3} {'type': 'loss', 'content': 0.0030074953101575375, 'timestamp': '2025-09-30 23:12:35.220092', 'step': 7246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:35.305585', 'step': 7246, 'epoch': 3} {'type': 'loss', 'content': 0.0007495456375181675, 'timestamp': '2025-09-30 23:12:35.318143', 'step': 7247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:35.407631', 'step': 7247, 'epoch': 3} {'type': 'loss', 'content': 9.373825741931796e-05, 'timestamp': '2025-09-30 23:12:35.425231', 'step': 7248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:12:35.508584', 'step': 7248, 'epoch': 3} {'type': 'loss', 'content': 0.0043285381980240345, 'timestamp': '2025-09-30 23:12:35.523095', 'step': 7249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:35.607310', 'step': 7249, 'epoch': 3} {'type': 'loss', 'content': 0.00042431411566212773, 'timestamp': '2025-09-30 23:12:35.610914', 'step': 7250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:35.697359', 'step': 7250, 'epoch': 3} {'type': 'loss', 'content': 0.006202413234859705, 'timestamp': '2025-09-30 23:12:35.709886', 'step': 7251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:35.810281', 'step': 7251, 'epoch': 3} {'type': 'loss', 'content': 0.0009655666653998196, 'timestamp': '2025-09-30 23:12:35.825070', 'step': 7252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:35.910316', 'step': 7252, 'epoch': 3} {'type': 'loss', 'content': 0.016344280913472176, 'timestamp': '2025-09-30 23:12:35.918092', 'step': 7253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:35.975900', 'step': 7253, 'epoch': 3} {'type': 'loss', 'content': 2.2885973521624692e-05, 'timestamp': '2025-09-30 23:12:35.991547', 'step': 7254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:12:36.075199', 'step': 7254, 'epoch': 3} {'type': 'loss', 'content': 0.0024637081660330296, 'timestamp': '2025-09-30 23:12:36.090024', 'step': 7255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:36.180563', 'step': 7255, 'epoch': 3} {'type': 'loss', 'content': 7.351457315962762e-05, 'timestamp': '2025-09-30 23:12:36.198491', 'step': 7256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:36.282261', 'step': 7256, 'epoch': 3} {'type': 'loss', 'content': 4.190444087726064e-05, 'timestamp': '2025-09-30 23:12:36.293526', 'step': 7257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:36.369818', 'step': 7257, 'epoch': 3} {'type': 'loss', 'content': 2.6416159016662277e-05, 'timestamp': '2025-09-30 23:12:36.374139', 'step': 7258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:12:36.446538', 'step': 7258, 'epoch': 3} {'type': 'loss', 'content': 0.0005045945872552693, 'timestamp': '2025-09-30 23:12:36.457410', 'step': 7259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:36.549648', 'step': 7259, 'epoch': 3} {'type': 'loss', 'content': 6.459742144215852e-05, 'timestamp': '2025-09-30 23:12:36.560110', 'step': 7260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:36.629916', 'step': 7260, 'epoch': 3} {'type': 'loss', 'content': 0.038474079221487045, 'timestamp': '2025-09-30 23:12:36.638836', 'step': 7261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:36.704018', 'step': 7261, 'epoch': 3} {'type': 'loss', 'content': 0.008707980625331402, 'timestamp': '2025-09-30 23:12:36.715422', 'step': 7262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:12:36.805251', 'step': 7262, 'epoch': 3} {'type': 'loss', 'content': 0.00198001554235816, 'timestamp': '2025-09-30 23:12:36.818943', 'step': 7263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:36.905146', 'step': 7263, 'epoch': 3} {'type': 'loss', 'content': 0.0008599380380474031, 'timestamp': '2025-09-30 23:12:36.920928', 'step': 7264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:36.981665', 'step': 7264, 'epoch': 3} {'type': 'loss', 'content': 5.622773096547462e-05, 'timestamp': '2025-09-30 23:12:36.994317', 'step': 7265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:37.083095', 'step': 7265, 'epoch': 3} {'type': 'loss', 'content': 0.00035731607931666076, 'timestamp': '2025-09-30 23:12:37.095603', 'step': 7266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:37.183619', 'step': 7266, 'epoch': 3} {'type': 'loss', 'content': 0.0001741637388477102, 'timestamp': '2025-09-30 23:12:37.194589', 'step': 7267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:37.276493', 'step': 7267, 'epoch': 3} {'type': 'loss', 'content': 3.6087345506530255e-05, 'timestamp': '2025-09-30 23:12:37.292659', 'step': 7268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:12:37.371614', 'step': 7268, 'epoch': 3} {'type': 'loss', 'content': 0.00018839364929590374, 'timestamp': '2025-09-30 23:12:37.375306', 'step': 7269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:37.431991', 'step': 7269, 'epoch': 3} {'type': 'loss', 'content': 2.070057780656498e-05, 'timestamp': '2025-09-30 23:12:37.441855', 'step': 7270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:37.542054', 'step': 7270, 'epoch': 3} {'type': 'loss', 'content': 0.000257999578025192, 'timestamp': '2025-09-30 23:12:37.553248', 'step': 7271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 1600009780544.0}, 'timestamp': '2025-09-30 23:12:37.642031', 'step': 7271, 'epoch': 3} {'type': 'loss', 'content': 0.02151213027536869, 'timestamp': '2025-09-30 23:12:37.663135', 'step': 7272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:37.741312', 'step': 7272, 'epoch': 3} {'type': 'loss', 'content': 0.0023527692537754774, 'timestamp': '2025-09-30 23:12:37.756597', 'step': 7273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:37.846729', 'step': 7273, 'epoch': 3} {'type': 'loss', 'content': 5.7013716286746785e-05, 'timestamp': '2025-09-30 23:12:37.861044', 'step': 7274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:37.949980', 'step': 7274, 'epoch': 3} {'type': 'loss', 'content': 0.004345959518104792, 'timestamp': '2025-09-30 23:12:37.960236', 'step': 7275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:38.038515', 'step': 7275, 'epoch': 3} {'type': 'loss', 'content': 0.00014482697588391602, 'timestamp': '2025-09-30 23:12:38.053251', 'step': 7276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 2240013665728.0}, 'timestamp': '2025-09-30 23:12:38.132025', 'step': 7276, 'epoch': 3} {'type': 'loss', 'content': 0.027703071013092995, 'timestamp': '2025-09-30 23:12:38.135894', 'step': 7277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:38.207207', 'step': 7277, 'epoch': 3} {'type': 'loss', 'content': 0.000636594370007515, 'timestamp': '2025-09-30 23:12:38.217990', 'step': 7278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:38.305657', 'step': 7278, 'epoch': 3} {'type': 'loss', 'content': 0.00018945163174066693, 'timestamp': '2025-09-30 23:12:38.318576', 'step': 7279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:38.423044', 'step': 7279, 'epoch': 3} {'type': 'loss', 'content': 0.00010347172792535275, 'timestamp': '2025-09-30 23:12:38.445747', 'step': 7280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:38.557972', 'step': 7280, 'epoch': 3} {'type': 'loss', 'content': 0.0008551345090381801, 'timestamp': '2025-09-30 23:12:38.577358', 'step': 7281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:38.651753', 'step': 7281, 'epoch': 3} {'type': 'loss', 'content': 0.03453756123781204, 'timestamp': '2025-09-30 23:12:38.667722', 'step': 7282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:38.757320', 'step': 7282, 'epoch': 3} {'type': 'loss', 'content': 0.07845765352249146, 'timestamp': '2025-09-30 23:12:38.768705', 'step': 7283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:38.848281', 'step': 7283, 'epoch': 3} {'type': 'loss', 'content': 0.00011493413330754265, 'timestamp': '2025-09-30 23:12:38.855985', 'step': 7284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:38.937086', 'step': 7284, 'epoch': 3} {'type': 'loss', 'content': 7.083938544383273e-05, 'timestamp': '2025-09-30 23:12:38.950655', 'step': 7285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:39.019481', 'step': 7285, 'epoch': 3} {'type': 'loss', 'content': 0.0032546245492994785, 'timestamp': '2025-09-30 23:12:39.023226', 'step': 7286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:39.083035', 'step': 7286, 'epoch': 3} {'type': 'loss', 'content': 5.021273318561725e-05, 'timestamp': '2025-09-30 23:12:39.090395', 'step': 7287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:39.162570', 'step': 7287, 'epoch': 3} {'type': 'loss', 'content': 0.00034938936005346477, 'timestamp': '2025-09-30 23:12:39.174374', 'step': 7288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:39.235439', 'step': 7288, 'epoch': 3} {'type': 'loss', 'content': 2.8751222998835146e-05, 'timestamp': '2025-09-30 23:12:39.241141', 'step': 7289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:39.314073', 'step': 7289, 'epoch': 3} {'type': 'loss', 'content': 0.0004744942707475275, 'timestamp': '2025-09-30 23:12:39.333492', 'step': 7290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:39.413380', 'step': 7290, 'epoch': 3} {'type': 'loss', 'content': 0.0023032445460557938, 'timestamp': '2025-09-30 23:12:39.432246', 'step': 7291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:39.557168', 'step': 7291, 'epoch': 3} {'type': 'loss', 'content': 0.07455889135599136, 'timestamp': '2025-09-30 23:12:39.580777', 'step': 7292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:39.655592', 'step': 7292, 'epoch': 3} {'type': 'loss', 'content': 0.0003553745336830616, 'timestamp': '2025-09-30 23:12:39.678805', 'step': 7293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:39.787576', 'step': 7293, 'epoch': 3} {'type': 'loss', 'content': 0.00019521545618772507, 'timestamp': '2025-09-30 23:12:39.808479', 'step': 7294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:39.870711', 'step': 7294, 'epoch': 3} {'type': 'loss', 'content': 0.009636180475354195, 'timestamp': '2025-09-30 23:12:39.875114', 'step': 7295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:39.955141', 'step': 7295, 'epoch': 3} {'type': 'loss', 'content': 0.0003101364418398589, 'timestamp': '2025-09-30 23:12:39.972611', 'step': 7296, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1596914505344}], 'timestamp': '2025-09-30 23:12:45.782273', 'step': 7296, 'epoch': 3} {'type': 'pplx', 'content': 7134626.544267386, 'timestamp': '2025-09-30 23:12:45.795013', 'step': 7296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:45.861924', 'step': 7296, 'epoch': 3} {'type': 'loss', 'content': 0.0006692245951853693, 'timestamp': '2025-09-30 23:12:45.869744', 'step': 7297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:45.929835', 'step': 7297, 'epoch': 3} {'type': 'loss', 'content': 0.006430033594369888, 'timestamp': '2025-09-30 23:12:45.938010', 'step': 7298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:46.018751', 'step': 7298, 'epoch': 3} {'type': 'loss', 'content': 0.0014472175389528275, 'timestamp': '2025-09-30 23:12:46.023134', 'step': 7299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:46.092058', 'step': 7299, 'epoch': 3} {'type': 'loss', 'content': 0.013864925131201744, 'timestamp': '2025-09-30 23:12:46.109013', 'step': 7300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:46.174620', 'step': 7300, 'epoch': 3} {'type': 'loss', 'content': 0.0005732430145144463, 'timestamp': '2025-09-30 23:12:46.185029', 'step': 7301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:46.249440', 'step': 7301, 'epoch': 3} {'type': 'loss', 'content': 0.0005887472070753574, 'timestamp': '2025-09-30 23:12:46.261338', 'step': 7302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:46.335637', 'step': 7302, 'epoch': 3} {'type': 'loss', 'content': 0.01990668848156929, 'timestamp': '2025-09-30 23:12:46.339430', 'step': 7303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:46.409677', 'step': 7303, 'epoch': 3} {'type': 'loss', 'content': 0.0061913589015603065, 'timestamp': '2025-09-30 23:12:46.421000', 'step': 7304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:46.497335', 'step': 7304, 'epoch': 3} {'type': 'loss', 'content': 0.00025634714984335005, 'timestamp': '2025-09-30 23:12:46.507488', 'step': 7305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:46.583647', 'step': 7305, 'epoch': 3} {'type': 'loss', 'content': 7.030024426057935e-05, 'timestamp': '2025-09-30 23:12:46.591829', 'step': 7306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 1920011723136.0}, 'timestamp': '2025-09-30 23:12:46.664310', 'step': 7306, 'epoch': 3} {'type': 'loss', 'content': 0.00029537148657254875, 'timestamp': '2025-09-30 23:12:46.675270', 'step': 7307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [1, 80], 'flops': 400002507344.0}, 'timestamp': '2025-09-30 23:12:46.752753', 'step': 7307, 'epoch': 3} {'type': 'loss', 'content': 7.169425225583836e-05, 'timestamp': '2025-09-30 23:12:46.760168', 'step': 7308, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 958148730240}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1596914505344}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1916297392896}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1277531617792}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1596914505344}], 'timestamp': '2025-09-30 23:12:52.600017', 'step': 7308, 'epoch': 3} {'type': 'pplx', 'content': 6807366.398394979, 'timestamp': '2025-09-30 23:12:52.603735', 'step': 7308, 'epoch': 3} {'type': 'best_pplx', 'content': 4430028.1106051, 'timestamp': '2025-09-30 23:12:52.621279', 'step': 7308, 'epoch': 3} {'type': 'best_step', 'content': 152, 'timestamp': '2025-09-30 23:12:52.633276', 'step': 7308, 'epoch': 3} {'type': 'total_pplx_flops', 'content': 9900870031129600, 'timestamp': '2025-09-30 23:12:52.641687', 'step': 7308, 'epoch': 3} {'type': 'total_train_flops', 'content': 1.4088326018008176e+16, 'timestamp': '2025-09-30 23:12:52.651841', 'step': 7308, 'epoch': 3}