{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1582003754624}], 'timestamp': '2025-09-10 02:26:13.524298', 'step': 0, 'epoch': 0} {'type': 'pplx', 'content': 68890406.29865518, 'timestamp': '2025-09-10 02:26:13.526235', 'step': 0, 'epoch': 0} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:13.592622', 'step': 0, 'epoch': 1} {'type': 'loss', 'content': 0.784187912940979, 'timestamp': '2025-09-10 02:26:13.594173', 'step': 1, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:13.623535', 'step': 1, 'epoch': 1} {'type': 'loss', 'content': 0.9402614235877991, 'timestamp': '2025-09-10 02:26:13.624815', 'step': 2, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:13.652804', 'step': 2, 'epoch': 1} {'type': 'loss', 'content': 0.9695280194282532, 'timestamp': '2025-09-10 02:26:13.654601', 'step': 3, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:13.682671', 'step': 3, 'epoch': 1} {'type': 'loss', 'content': 0.8488698601722717, 'timestamp': '2025-09-10 02:26:13.772469', 'step': 4, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:13.802136', 'step': 4, 'epoch': 1} {'type': 'loss', 'content': 0.0741942897439003, 'timestamp': '2025-09-10 02:26:13.803521', 'step': 5, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:13.832478', 'step': 5, 'epoch': 1} {'type': 'loss', 'content': 0.08400722593069077, 'timestamp': '2025-09-10 02:26:13.834006', 'step': 6, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:13.862051', 'step': 6, 'epoch': 1} {'type': 'loss', 'content': 0.08074257522821426, 'timestamp': '2025-09-10 02:26:13.863266', 'step': 7, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:13.891901', 'step': 7, 'epoch': 1} {'type': 'loss', 'content': 0.09530693292617798, 'timestamp': '2025-09-10 02:26:13.915100', 'step': 8, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:13.965999', 'step': 8, 'epoch': 1} {'type': 'loss', 'content': 0.05420571565628052, 'timestamp': '2025-09-10 02:26:13.967577', 'step': 9, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:14.014487', 'step': 9, 'epoch': 1} {'type': 'loss', 'content': 0.04895346984267235, 'timestamp': '2025-09-10 02:26:14.015879', 'step': 10, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:14.044805', 'step': 10, 'epoch': 1} {'type': 'loss', 'content': 0.05694868788123131, 'timestamp': '2025-09-10 02:26:14.046849', 'step': 11, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:14.075117', 'step': 11, 'epoch': 1} {'type': 'loss', 'content': 0.05808507651090622, 'timestamp': '2025-09-10 02:26:14.098051', 'step': 12, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:14.126896', 'step': 12, 'epoch': 1} {'type': 'loss', 'content': 0.06551863998174667, 'timestamp': '2025-09-10 02:26:14.128469', 'step': 13, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:14.156688', 'step': 13, 'epoch': 1} {'type': 'loss', 'content': 0.054176900535821915, 'timestamp': '2025-09-10 02:26:14.158099', 'step': 14, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:14.186202', 'step': 14, 'epoch': 1} {'type': 'loss', 'content': 0.0339355394244194, 'timestamp': '2025-09-10 02:26:14.187762', 'step': 15, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:14.215858', 'step': 15, 'epoch': 1} {'type': 'loss', 'content': 0.041469670832157135, 'timestamp': '2025-09-10 02:26:14.238658', 'step': 16, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:14.267079', 'step': 16, 'epoch': 1} {'type': 'loss', 'content': 0.06128307431936264, 'timestamp': '2025-09-10 02:26:14.268524', 'step': 17, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:14.297136', 'step': 17, 'epoch': 1} {'type': 'loss', 'content': 0.03547512739896774, 'timestamp': '2025-09-10 02:26:14.298573', 'step': 18, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:14.327130', 'step': 18, 'epoch': 1} {'type': 'loss', 'content': 0.08370207995176315, 'timestamp': '2025-09-10 02:26:14.328766', 'step': 19, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:14.356877', 'step': 19, 'epoch': 1} {'type': 'loss', 'content': 0.050863347947597504, 'timestamp': '2025-09-10 02:26:14.379643', 'step': 20, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:14.412795', 'step': 20, 'epoch': 1} {'type': 'loss', 'content': 0.0537407211959362, 'timestamp': '2025-09-10 02:26:14.414235', 'step': 21, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:14.442911', 'step': 21, 'epoch': 1} {'type': 'loss', 'content': 0.05583806708455086, 'timestamp': '2025-09-10 02:26:14.444284', 'step': 22, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:14.473247', 'step': 22, 'epoch': 1} {'type': 'loss', 'content': 0.04571468010544777, 'timestamp': '2025-09-10 02:26:14.475403', 'step': 23, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:14.504448', 'step': 23, 'epoch': 1} {'type': 'loss', 'content': 0.041455768048763275, 'timestamp': '2025-09-10 02:26:14.527447', 'step': 24, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:14.555519', 'step': 24, 'epoch': 1} {'type': 'loss', 'content': 0.04378771409392357, 'timestamp': '2025-09-10 02:26:14.557068', 'step': 25, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:14.585593', 'step': 25, 'epoch': 1} {'type': 'loss', 'content': 0.04339175671339035, 'timestamp': '2025-09-10 02:26:14.587165', 'step': 26, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:14.615648', 'step': 26, 'epoch': 1} {'type': 'loss', 'content': 0.04379519447684288, 'timestamp': '2025-09-10 02:26:14.617229', 'step': 27, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:14.645665', 'step': 27, 'epoch': 1} {'type': 'loss', 'content': 0.041180964559316635, 'timestamp': '2025-09-10 02:26:14.668564', 'step': 28, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:14.697113', 'step': 28, 'epoch': 1} {'type': 'loss', 'content': 0.022096460685133934, 'timestamp': '2025-09-10 02:26:14.698549', 'step': 29, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:14.726836', 'step': 29, 'epoch': 1} {'type': 'loss', 'content': 0.053119901567697525, 'timestamp': '2025-09-10 02:26:14.728042', 'step': 30, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:14.756017', 'step': 30, 'epoch': 1} {'type': 'loss', 'content': 0.031400687992572784, 'timestamp': '2025-09-10 02:26:14.757476', 'step': 31, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:14.785833', 'step': 31, 'epoch': 1} {'type': 'loss', 'content': 0.045110367238521576, 'timestamp': '2025-09-10 02:26:14.808980', 'step': 32, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:14.837430', 'step': 32, 'epoch': 1} {'type': 'loss', 'content': 0.054868753999471664, 'timestamp': '2025-09-10 02:26:14.839041', 'step': 33, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:14.867358', 'step': 33, 'epoch': 1} {'type': 'loss', 'content': 0.0636897161602974, 'timestamp': '2025-09-10 02:26:14.868799', 'step': 34, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:14.897213', 'step': 34, 'epoch': 1} {'type': 'loss', 'content': 0.04060392826795578, 'timestamp': '2025-09-10 02:26:14.898677', 'step': 35, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:14.927090', 'step': 35, 'epoch': 1} {'type': 'loss', 'content': 0.04056873545050621, 'timestamp': '2025-09-10 02:26:14.949950', 'step': 36, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:14.979350', 'step': 36, 'epoch': 1} {'type': 'loss', 'content': 0.03306897357106209, 'timestamp': '2025-09-10 02:26:14.980982', 'step': 37, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:15.010152', 'step': 37, 'epoch': 1} {'type': 'loss', 'content': 0.037819407880306244, 'timestamp': '2025-09-10 02:26:15.011768', 'step': 38, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:15.040529', 'step': 38, 'epoch': 1} {'type': 'loss', 'content': 0.02081454172730446, 'timestamp': '2025-09-10 02:26:15.042188', 'step': 39, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:15.071219', 'step': 39, 'epoch': 1} {'type': 'loss', 'content': 0.03982502594590187, 'timestamp': '2025-09-10 02:26:15.094237', 'step': 40, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:15.122803', 'step': 40, 'epoch': 1} {'type': 'loss', 'content': 0.04330352321267128, 'timestamp': '2025-09-10 02:26:15.124383', 'step': 41, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:15.153283', 'step': 41, 'epoch': 1} {'type': 'loss', 'content': 0.03618653863668442, 'timestamp': '2025-09-10 02:26:15.154850', 'step': 42, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:15.183447', 'step': 42, 'epoch': 1} {'type': 'loss', 'content': 0.038391124457120895, 'timestamp': '2025-09-10 02:26:15.185005', 'step': 43, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:15.213025', 'step': 43, 'epoch': 1} {'type': 'loss', 'content': 0.08018808811903, 'timestamp': '2025-09-10 02:26:15.236052', 'step': 44, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:15.264797', 'step': 44, 'epoch': 1} {'type': 'loss', 'content': 0.02720540761947632, 'timestamp': '2025-09-10 02:26:15.266308', 'step': 45, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:15.294802', 'step': 45, 'epoch': 1} {'type': 'loss', 'content': 0.05708106607198715, 'timestamp': '2025-09-10 02:26:15.296254', 'step': 46, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:15.324914', 'step': 46, 'epoch': 1} {'type': 'loss', 'content': 0.03747594356536865, 'timestamp': '2025-09-10 02:26:15.326435', 'step': 47, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:15.355563', 'step': 47, 'epoch': 1} {'type': 'loss', 'content': 0.0383264534175396, 'timestamp': '2025-09-10 02:26:15.378617', 'step': 48, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:15.407519', 'step': 48, 'epoch': 1} {'type': 'loss', 'content': 0.052838169038295746, 'timestamp': '2025-09-10 02:26:15.408927', 'step': 49, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:15.437592', 'step': 49, 'epoch': 1} {'type': 'loss', 'content': 0.04840589687228203, 'timestamp': '2025-09-10 02:26:15.439012', 'step': 50, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:15.467498', 'step': 50, 'epoch': 1} {'type': 'loss', 'content': 0.08914273232221603, 'timestamp': '2025-09-10 02:26:15.468930', 'step': 51, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:15.503794', 'step': 51, 'epoch': 1} {'type': 'loss', 'content': 0.02010265178978443, 'timestamp': '2025-09-10 02:26:15.526986', 'step': 52, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:15.557385', 'step': 52, 'epoch': 1} {'type': 'loss', 'content': 0.09015186876058578, 'timestamp': '2025-09-10 02:26:15.559220', 'step': 53, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:15.587952', 'step': 53, 'epoch': 1} {'type': 'loss', 'content': 0.015328974463045597, 'timestamp': '2025-09-10 02:26:15.589775', 'step': 54, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:15.618275', 'step': 54, 'epoch': 1} {'type': 'loss', 'content': 0.035104621201753616, 'timestamp': '2025-09-10 02:26:15.620102', 'step': 55, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:15.648987', 'step': 55, 'epoch': 1} {'type': 'loss', 'content': 0.056207407265901566, 'timestamp': '2025-09-10 02:26:15.672216', 'step': 56, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:15.701206', 'step': 56, 'epoch': 1} {'type': 'loss', 'content': 0.009822872467339039, 'timestamp': '2025-09-10 02:26:15.703009', 'step': 57, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:15.731703', 'step': 57, 'epoch': 1} {'type': 'loss', 'content': 0.04551895335316658, 'timestamp': '2025-09-10 02:26:15.733492', 'step': 58, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:15.762284', 'step': 58, 'epoch': 1} {'type': 'loss', 'content': 0.05960583686828613, 'timestamp': '2025-09-10 02:26:15.764183', 'step': 59, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:15.792963', 'step': 59, 'epoch': 1} {'type': 'loss', 'content': 0.0849723145365715, 'timestamp': '2025-09-10 02:26:15.816148', 'step': 60, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:15.845084', 'step': 60, 'epoch': 1} {'type': 'loss', 'content': 0.01958092674612999, 'timestamp': '2025-09-10 02:26:15.847104', 'step': 61, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:15.875888', 'step': 61, 'epoch': 1} {'type': 'loss', 'content': 0.04763122275471687, 'timestamp': '2025-09-10 02:26:15.877522', 'step': 62, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:15.906937', 'step': 62, 'epoch': 1} {'type': 'loss', 'content': 0.05476832017302513, 'timestamp': '2025-09-10 02:26:15.908576', 'step': 63, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:15.936928', 'step': 63, 'epoch': 1} {'type': 'loss', 'content': 0.043183039873838425, 'timestamp': '2025-09-10 02:26:15.959969', 'step': 64, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:15.989583', 'step': 64, 'epoch': 1} {'type': 'loss', 'content': 0.06260648369789124, 'timestamp': '2025-09-10 02:26:15.991986', 'step': 65, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:16.020682', 'step': 65, 'epoch': 1} {'type': 'loss', 'content': 0.024250363931059837, 'timestamp': '2025-09-10 02:26:16.022485', 'step': 66, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:16.051140', 'step': 66, 'epoch': 1} {'type': 'loss', 'content': 0.024692872539162636, 'timestamp': '2025-09-10 02:26:16.052889', 'step': 67, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:16.084626', 'step': 67, 'epoch': 1} {'type': 'loss', 'content': 0.07003824412822723, 'timestamp': '2025-09-10 02:26:16.107891', 'step': 68, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:16.136698', 'step': 68, 'epoch': 1} {'type': 'loss', 'content': 0.11602479964494705, 'timestamp': '2025-09-10 02:26:16.138083', 'step': 69, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:16.167096', 'step': 69, 'epoch': 1} {'type': 'loss', 'content': 0.0395672507584095, 'timestamp': '2025-09-10 02:26:16.168597', 'step': 70, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:16.197674', 'step': 70, 'epoch': 1} {'type': 'loss', 'content': 0.05587854981422424, 'timestamp': '2025-09-10 02:26:16.199052', 'step': 71, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:16.228575', 'step': 71, 'epoch': 1} {'type': 'loss', 'content': 0.016235610470175743, 'timestamp': '2025-09-10 02:26:16.251865', 'step': 72, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:16.281543', 'step': 72, 'epoch': 1} {'type': 'loss', 'content': 0.04589106887578964, 'timestamp': '2025-09-10 02:26:16.283174', 'step': 73, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:16.313168', 'step': 73, 'epoch': 1} {'type': 'loss', 'content': 0.07501351088285446, 'timestamp': '2025-09-10 02:26:16.314760', 'step': 74, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:16.344629', 'step': 74, 'epoch': 1} {'type': 'loss', 'content': 0.04023617133498192, 'timestamp': '2025-09-10 02:26:16.346140', 'step': 75, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:16.375324', 'step': 75, 'epoch': 1} {'type': 'loss', 'content': 0.043087732046842575, 'timestamp': '2025-09-10 02:26:16.398432', 'step': 76, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:16.427916', 'step': 76, 'epoch': 1} {'type': 'loss', 'content': 0.07063813507556915, 'timestamp': '2025-09-10 02:26:16.429310', 'step': 77, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:16.458330', 'step': 77, 'epoch': 1} {'type': 'loss', 'content': 0.07721427828073502, 'timestamp': '2025-09-10 02:26:16.459764', 'step': 78, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:16.489500', 'step': 78, 'epoch': 1} {'type': 'loss', 'content': 0.08201445639133453, 'timestamp': '2025-09-10 02:26:16.490919', 'step': 79, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:16.520308', 'step': 79, 'epoch': 1} {'type': 'loss', 'content': 0.037445347756147385, 'timestamp': '2025-09-10 02:26:16.543592', 'step': 80, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:16.572617', 'step': 80, 'epoch': 1} {'type': 'loss', 'content': 0.03383129462599754, 'timestamp': '2025-09-10 02:26:16.574306', 'step': 81, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:16.603888', 'step': 81, 'epoch': 1} {'type': 'loss', 'content': 0.06920628994703293, 'timestamp': '2025-09-10 02:26:16.605804', 'step': 82, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:16.634788', 'step': 82, 'epoch': 1} {'type': 'loss', 'content': 0.057791560888290405, 'timestamp': '2025-09-10 02:26:16.636543', 'step': 83, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:16.665235', 'step': 83, 'epoch': 1} {'type': 'loss', 'content': 0.04435967653989792, 'timestamp': '2025-09-10 02:26:16.688516', 'step': 84, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:16.717461', 'step': 84, 'epoch': 1} {'type': 'loss', 'content': 0.03706847503781319, 'timestamp': '2025-09-10 02:26:16.719282', 'step': 85, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:16.748172', 'step': 85, 'epoch': 1} {'type': 'loss', 'content': 0.031112460419535637, 'timestamp': '2025-09-10 02:26:16.749896', 'step': 86, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:16.778525', 'step': 86, 'epoch': 1} {'type': 'loss', 'content': 0.06309755146503448, 'timestamp': '2025-09-10 02:26:16.780175', 'step': 87, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:16.809312', 'step': 87, 'epoch': 1} {'type': 'loss', 'content': 0.04229046776890755, 'timestamp': '2025-09-10 02:26:16.833521', 'step': 88, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:16.862018', 'step': 88, 'epoch': 1} {'type': 'loss', 'content': 0.02340858057141304, 'timestamp': '2025-09-10 02:26:16.863661', 'step': 89, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:16.891912', 'step': 89, 'epoch': 1} {'type': 'loss', 'content': 0.05444183200597763, 'timestamp': '2025-09-10 02:26:16.893599', 'step': 90, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:16.921943', 'step': 90, 'epoch': 1} {'type': 'loss', 'content': 0.03168310970067978, 'timestamp': '2025-09-10 02:26:16.923859', 'step': 91, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:16.952720', 'step': 91, 'epoch': 1} {'type': 'loss', 'content': 0.0278034545481205, 'timestamp': '2025-09-10 02:26:16.976018', 'step': 92, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:17.005150', 'step': 92, 'epoch': 1} {'type': 'loss', 'content': 0.03337569162249565, 'timestamp': '2025-09-10 02:26:17.006996', 'step': 93, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:17.035422', 'step': 93, 'epoch': 1} {'type': 'loss', 'content': 0.04794316738843918, 'timestamp': '2025-09-10 02:26:17.037023', 'step': 94, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:17.065605', 'step': 94, 'epoch': 1} {'type': 'loss', 'content': 0.04414382576942444, 'timestamp': '2025-09-10 02:26:17.067204', 'step': 95, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:17.095901', 'step': 95, 'epoch': 1} {'type': 'loss', 'content': 0.04380037635564804, 'timestamp': '2025-09-10 02:26:17.119020', 'step': 96, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:17.148381', 'step': 96, 'epoch': 1} {'type': 'loss', 'content': 0.04589477926492691, 'timestamp': '2025-09-10 02:26:17.150063', 'step': 97, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:17.178477', 'step': 97, 'epoch': 1} {'type': 'loss', 'content': 0.03385995700955391, 'timestamp': '2025-09-10 02:26:17.180652', 'step': 98, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:17.208976', 'step': 98, 'epoch': 1} {'type': 'loss', 'content': 0.040779177099466324, 'timestamp': '2025-09-10 02:26:17.210594', 'step': 99, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:17.238862', 'step': 99, 'epoch': 1} {'type': 'loss', 'content': 0.036285847425460815, 'timestamp': '2025-09-10 02:26:17.266483', 'step': 100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:17.298194', 'step': 100, 'epoch': 1} {'type': 'loss', 'content': 0.026493841782212257, 'timestamp': '2025-09-10 02:26:17.302159', 'step': 101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:17.332146', 'step': 101, 'epoch': 1} {'type': 'loss', 'content': 0.02587372623383999, 'timestamp': '2025-09-10 02:26:17.333544', 'step': 102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:17.363048', 'step': 102, 'epoch': 1} {'type': 'loss', 'content': 0.04184553027153015, 'timestamp': '2025-09-10 02:26:17.364496', 'step': 103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:17.393032', 'step': 103, 'epoch': 1} {'type': 'loss', 'content': 0.04045234993100166, 'timestamp': '2025-09-10 02:26:17.418098', 'step': 104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:17.446652', 'step': 104, 'epoch': 1} {'type': 'loss', 'content': 0.015993589535355568, 'timestamp': '2025-09-10 02:26:17.448264', 'step': 105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:17.476805', 'step': 105, 'epoch': 1} {'type': 'loss', 'content': 0.03624787554144859, 'timestamp': '2025-09-10 02:26:17.478193', 'step': 106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:17.507475', 'step': 106, 'epoch': 1} {'type': 'loss', 'content': 0.03354557603597641, 'timestamp': '2025-09-10 02:26:17.509125', 'step': 107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:17.536951', 'step': 107, 'epoch': 1} {'type': 'loss', 'content': 0.05234936624765396, 'timestamp': '2025-09-10 02:26:17.559899', 'step': 108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:17.588316', 'step': 108, 'epoch': 1} {'type': 'loss', 'content': 0.026534216478466988, 'timestamp': '2025-09-10 02:26:17.590009', 'step': 109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:17.618304', 'step': 109, 'epoch': 1} {'type': 'loss', 'content': 0.05735252425074577, 'timestamp': '2025-09-10 02:26:17.620170', 'step': 110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:17.649166', 'step': 110, 'epoch': 1} {'type': 'loss', 'content': 0.053705159574747086, 'timestamp': '2025-09-10 02:26:17.652293', 'step': 111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:17.683236', 'step': 111, 'epoch': 1} {'type': 'loss', 'content': 0.044398847967386246, 'timestamp': '2025-09-10 02:26:17.706556', 'step': 112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:17.735203', 'step': 112, 'epoch': 1} {'type': 'loss', 'content': 0.03528786823153496, 'timestamp': '2025-09-10 02:26:17.737352', 'step': 113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:17.766273', 'step': 113, 'epoch': 1} {'type': 'loss', 'content': 0.06282095611095428, 'timestamp': '2025-09-10 02:26:17.768071', 'step': 114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:17.796683', 'step': 114, 'epoch': 1} {'type': 'loss', 'content': 0.06083959341049194, 'timestamp': '2025-09-10 02:26:17.798374', 'step': 115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:17.826424', 'step': 115, 'epoch': 1} {'type': 'loss', 'content': 0.04757341742515564, 'timestamp': '2025-09-10 02:26:17.849588', 'step': 116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:17.878164', 'step': 116, 'epoch': 1} {'type': 'loss', 'content': 0.04990594834089279, 'timestamp': '2025-09-10 02:26:17.879835', 'step': 117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:17.908676', 'step': 117, 'epoch': 1} {'type': 'loss', 'content': 0.03153015300631523, 'timestamp': '2025-09-10 02:26:17.910502', 'step': 118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:17.939098', 'step': 118, 'epoch': 1} {'type': 'loss', 'content': 0.03902817517518997, 'timestamp': '2025-09-10 02:26:17.940863', 'step': 119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:17.969439', 'step': 119, 'epoch': 1} {'type': 'loss', 'content': 0.03202887997031212, 'timestamp': '2025-09-10 02:26:17.992527', 'step': 120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:18.021926', 'step': 120, 'epoch': 1} {'type': 'loss', 'content': 0.02192959189414978, 'timestamp': '2025-09-10 02:26:18.023304', 'step': 121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:18.051518', 'step': 121, 'epoch': 1} {'type': 'loss', 'content': 0.028383001685142517, 'timestamp': '2025-09-10 02:26:18.052841', 'step': 122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:18.084660', 'step': 122, 'epoch': 1} {'type': 'loss', 'content': 0.059359509497880936, 'timestamp': '2025-09-10 02:26:18.086135', 'step': 123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:18.115037', 'step': 123, 'epoch': 1} {'type': 'loss', 'content': 0.03712543845176697, 'timestamp': '2025-09-10 02:26:18.139005', 'step': 124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:18.168086', 'step': 124, 'epoch': 1} {'type': 'loss', 'content': 0.07202344387769699, 'timestamp': '2025-09-10 02:26:18.169297', 'step': 125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:18.198484', 'step': 125, 'epoch': 1} {'type': 'loss', 'content': 0.019284551963210106, 'timestamp': '2025-09-10 02:26:18.200772', 'step': 126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:18.229103', 'step': 126, 'epoch': 1} {'type': 'loss', 'content': 0.04529854282736778, 'timestamp': '2025-09-10 02:26:18.231013', 'step': 127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:18.259682', 'step': 127, 'epoch': 1} {'type': 'loss', 'content': 0.036308206617832184, 'timestamp': '2025-09-10 02:26:18.282605', 'step': 128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:18.311567', 'step': 128, 'epoch': 1} {'type': 'loss', 'content': 0.06300818175077438, 'timestamp': '2025-09-10 02:26:18.313266', 'step': 129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:18.341832', 'step': 129, 'epoch': 1} {'type': 'loss', 'content': 0.02641741931438446, 'timestamp': '2025-09-10 02:26:18.343609', 'step': 130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:18.371963', 'step': 130, 'epoch': 1} {'type': 'loss', 'content': 0.03118242882192135, 'timestamp': '2025-09-10 02:26:18.373788', 'step': 131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:18.402383', 'step': 131, 'epoch': 1} {'type': 'loss', 'content': 0.026283519342541695, 'timestamp': '2025-09-10 02:26:18.425517', 'step': 132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:18.459956', 'step': 132, 'epoch': 1} {'type': 'loss', 'content': 0.016796765848994255, 'timestamp': '2025-09-10 02:26:18.461783', 'step': 133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:18.490694', 'step': 133, 'epoch': 1} {'type': 'loss', 'content': 0.011252478696405888, 'timestamp': '2025-09-10 02:26:18.492227', 'step': 134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:18.521295', 'step': 134, 'epoch': 1} {'type': 'loss', 'content': 0.028942221775650978, 'timestamp': '2025-09-10 02:26:18.522889', 'step': 135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:18.552693', 'step': 135, 'epoch': 1} {'type': 'loss', 'content': 0.01871657930314541, 'timestamp': '2025-09-10 02:26:18.576080', 'step': 136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:18.604924', 'step': 136, 'epoch': 1} {'type': 'loss', 'content': 0.04277872294187546, 'timestamp': '2025-09-10 02:26:18.606623', 'step': 137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:18.635283', 'step': 137, 'epoch': 1} {'type': 'loss', 'content': 0.01327602844685316, 'timestamp': '2025-09-10 02:26:18.636935', 'step': 138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:18.666162', 'step': 138, 'epoch': 1} {'type': 'loss', 'content': 0.040411483496427536, 'timestamp': '2025-09-10 02:26:18.667696', 'step': 139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:18.696329', 'step': 139, 'epoch': 1} {'type': 'loss', 'content': 0.012431403622031212, 'timestamp': '2025-09-10 02:26:18.719107', 'step': 140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:18.748303', 'step': 140, 'epoch': 1} {'type': 'loss', 'content': 0.022387627512216568, 'timestamp': '2025-09-10 02:26:18.750077', 'step': 141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:18.789074', 'step': 141, 'epoch': 1} {'type': 'loss', 'content': 0.061557136476039886, 'timestamp': '2025-09-10 02:26:18.792868', 'step': 142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:18.826710', 'step': 142, 'epoch': 1} {'type': 'loss', 'content': 0.023226479068398476, 'timestamp': '2025-09-10 02:26:18.828500', 'step': 143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:18.857576', 'step': 143, 'epoch': 1} {'type': 'loss', 'content': 0.02590755559504032, 'timestamp': '2025-09-10 02:26:18.881140', 'step': 144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:18.918310', 'step': 144, 'epoch': 1} {'type': 'loss', 'content': 0.024034647271037102, 'timestamp': '2025-09-10 02:26:18.922067', 'step': 145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:18.971480', 'step': 145, 'epoch': 1} {'type': 'loss', 'content': 0.024593573063611984, 'timestamp': '2025-09-10 02:26:18.974677', 'step': 146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:19.006397', 'step': 146, 'epoch': 1} {'type': 'loss', 'content': 0.03884469345211983, 'timestamp': '2025-09-10 02:26:19.008010', 'step': 147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:19.037056', 'step': 147, 'epoch': 1} {'type': 'loss', 'content': 0.0037036570720374584, 'timestamp': '2025-09-10 02:26:19.060018', 'step': 148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:19.088187', 'step': 148, 'epoch': 1} {'type': 'loss', 'content': 0.013467243872582912, 'timestamp': '2025-09-10 02:26:19.089862', 'step': 149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:19.118234', 'step': 149, 'epoch': 1} {'type': 'loss', 'content': 0.03569980338215828, 'timestamp': '2025-09-10 02:26:19.119846', 'step': 150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:19.148790', 'step': 150, 'epoch': 1} {'type': 'loss', 'content': 0.05327266454696655, 'timestamp': '2025-09-10 02:26:19.150392', 'step': 151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:19.178971', 'step': 151, 'epoch': 1} {'type': 'loss', 'content': 0.02637994848191738, 'timestamp': '2025-09-10 02:26:19.202054', 'step': 152, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1582003754624}], 'timestamp': '2025-09-10 02:26:21.067095', 'step': 152, 'epoch': 1} {'type': 'pplx', 'content': 2095964.7926953253, 'timestamp': '2025-09-10 02:26:21.068909', 'step': 152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:21.097345', 'step': 152, 'epoch': 1} {'type': 'loss', 'content': 0.07849128544330597, 'timestamp': '2025-09-10 02:26:21.098978', 'step': 153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:21.128487', 'step': 153, 'epoch': 1} {'type': 'loss', 'content': 0.03526788949966431, 'timestamp': '2025-09-10 02:26:21.130140', 'step': 154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:21.159156', 'step': 154, 'epoch': 1} {'type': 'loss', 'content': 0.04433086887001991, 'timestamp': '2025-09-10 02:26:21.160994', 'step': 155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:21.189477', 'step': 155, 'epoch': 1} {'type': 'loss', 'content': 0.07645846158266068, 'timestamp': '2025-09-10 02:26:21.212664', 'step': 156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:21.241691', 'step': 156, 'epoch': 1} {'type': 'loss', 'content': 0.003905185032635927, 'timestamp': '2025-09-10 02:26:21.243356', 'step': 157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:21.271923', 'step': 157, 'epoch': 1} {'type': 'loss', 'content': 0.006804810371249914, 'timestamp': '2025-09-10 02:26:21.273506', 'step': 158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:21.302001', 'step': 158, 'epoch': 1} {'type': 'loss', 'content': 0.019923415035009384, 'timestamp': '2025-09-10 02:26:21.303564', 'step': 159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:21.332038', 'step': 159, 'epoch': 1} {'type': 'loss', 'content': 0.05792221054434776, 'timestamp': '2025-09-10 02:26:21.355247', 'step': 160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:21.384202', 'step': 160, 'epoch': 1} {'type': 'loss', 'content': 0.07608406245708466, 'timestamp': '2025-09-10 02:26:21.385747', 'step': 161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:21.414044', 'step': 161, 'epoch': 1} {'type': 'loss', 'content': 0.02290247194468975, 'timestamp': '2025-09-10 02:26:21.415414', 'step': 162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:21.444229', 'step': 162, 'epoch': 1} {'type': 'loss', 'content': 0.030202900990843773, 'timestamp': '2025-09-10 02:26:21.445974', 'step': 163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:21.474480', 'step': 163, 'epoch': 1} {'type': 'loss', 'content': 0.008085962384939194, 'timestamp': '2025-09-10 02:26:21.497594', 'step': 164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:21.526912', 'step': 164, 'epoch': 1} {'type': 'loss', 'content': 0.029565736651420593, 'timestamp': '2025-09-10 02:26:21.528474', 'step': 165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:21.557647', 'step': 165, 'epoch': 1} {'type': 'loss', 'content': 0.07050912082195282, 'timestamp': '2025-09-10 02:26:21.559200', 'step': 166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:21.587896', 'step': 166, 'epoch': 1} {'type': 'loss', 'content': 0.029368679970502853, 'timestamp': '2025-09-10 02:26:21.589611', 'step': 167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:21.617896', 'step': 167, 'epoch': 1} {'type': 'loss', 'content': 0.03375962749123573, 'timestamp': '2025-09-10 02:26:21.641349', 'step': 168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:21.670062', 'step': 168, 'epoch': 1} {'type': 'loss', 'content': 0.051859911531209946, 'timestamp': '2025-09-10 02:26:21.671888', 'step': 169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:21.700528', 'step': 169, 'epoch': 1} {'type': 'loss', 'content': 0.03229187801480293, 'timestamp': '2025-09-10 02:26:21.702132', 'step': 170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:21.730879', 'step': 170, 'epoch': 1} {'type': 'loss', 'content': 0.037152018398046494, 'timestamp': '2025-09-10 02:26:21.732873', 'step': 171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:21.762359', 'step': 171, 'epoch': 1} {'type': 'loss', 'content': 0.044093675911426544, 'timestamp': '2025-09-10 02:26:21.785669', 'step': 172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:21.814297', 'step': 172, 'epoch': 1} {'type': 'loss', 'content': 0.04556509107351303, 'timestamp': '2025-09-10 02:26:21.816145', 'step': 173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:21.844590', 'step': 173, 'epoch': 1} {'type': 'loss', 'content': 0.039365023374557495, 'timestamp': '2025-09-10 02:26:21.846394', 'step': 174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:21.875229', 'step': 174, 'epoch': 1} {'type': 'loss', 'content': 0.05335281416773796, 'timestamp': '2025-09-10 02:26:21.876869', 'step': 175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:21.905172', 'step': 175, 'epoch': 1} {'type': 'loss', 'content': 0.03890601545572281, 'timestamp': '2025-09-10 02:26:21.928384', 'step': 176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:21.957519', 'step': 176, 'epoch': 1} {'type': 'loss', 'content': 0.03906140476465225, 'timestamp': '2025-09-10 02:26:21.959111', 'step': 177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:21.988085', 'step': 177, 'epoch': 1} {'type': 'loss', 'content': 0.019950097426772118, 'timestamp': '2025-09-10 02:26:21.990672', 'step': 178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:22.019273', 'step': 178, 'epoch': 1} {'type': 'loss', 'content': 0.004369103349745274, 'timestamp': '2025-09-10 02:26:22.021059', 'step': 179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:22.049632', 'step': 179, 'epoch': 1} {'type': 'loss', 'content': 0.06852971017360687, 'timestamp': '2025-09-10 02:26:22.073099', 'step': 180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:22.102650', 'step': 180, 'epoch': 1} {'type': 'loss', 'content': 0.005697351414710283, 'timestamp': '2025-09-10 02:26:22.104617', 'step': 181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:22.133331', 'step': 181, 'epoch': 1} {'type': 'loss', 'content': 0.009286266751587391, 'timestamp': '2025-09-10 02:26:22.134685', 'step': 182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:22.162682', 'step': 182, 'epoch': 1} {'type': 'loss', 'content': 0.052384261041879654, 'timestamp': '2025-09-10 02:26:22.164082', 'step': 183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:22.192832', 'step': 183, 'epoch': 1} {'type': 'loss', 'content': 0.03431450203061104, 'timestamp': '2025-09-10 02:26:22.215938', 'step': 184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:22.245652', 'step': 184, 'epoch': 1} {'type': 'loss', 'content': 0.05092969536781311, 'timestamp': '2025-09-10 02:26:22.246982', 'step': 185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:22.275224', 'step': 185, 'epoch': 1} {'type': 'loss', 'content': 0.022450143471360207, 'timestamp': '2025-09-10 02:26:22.276665', 'step': 186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:22.305883', 'step': 186, 'epoch': 1} {'type': 'loss', 'content': 0.022179920226335526, 'timestamp': '2025-09-10 02:26:22.307836', 'step': 187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:22.336793', 'step': 187, 'epoch': 1} {'type': 'loss', 'content': 0.0721798688173294, 'timestamp': '2025-09-10 02:26:22.360332', 'step': 188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:22.389644', 'step': 188, 'epoch': 1} {'type': 'loss', 'content': 0.032928816974163055, 'timestamp': '2025-09-10 02:26:22.391307', 'step': 189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:22.420074', 'step': 189, 'epoch': 1} {'type': 'loss', 'content': 0.023632245138287544, 'timestamp': '2025-09-10 02:26:22.421956', 'step': 190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:22.450670', 'step': 190, 'epoch': 1} {'type': 'loss', 'content': 0.05820164084434509, 'timestamp': '2025-09-10 02:26:22.452548', 'step': 191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:22.481380', 'step': 191, 'epoch': 1} {'type': 'loss', 'content': 0.05574128404259682, 'timestamp': '2025-09-10 02:26:22.504754', 'step': 192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:22.533254', 'step': 192, 'epoch': 1} {'type': 'loss', 'content': 0.0478353314101696, 'timestamp': '2025-09-10 02:26:22.534978', 'step': 193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:22.563463', 'step': 193, 'epoch': 1} {'type': 'loss', 'content': 0.030672820284962654, 'timestamp': '2025-09-10 02:26:22.565396', 'step': 194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:22.594615', 'step': 194, 'epoch': 1} {'type': 'loss', 'content': 0.032667286694049835, 'timestamp': '2025-09-10 02:26:22.596297', 'step': 195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:22.624958', 'step': 195, 'epoch': 1} {'type': 'loss', 'content': 0.03710975870490074, 'timestamp': '2025-09-10 02:26:22.648364', 'step': 196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:22.677810', 'step': 196, 'epoch': 1} {'type': 'loss', 'content': 0.03341403231024742, 'timestamp': '2025-09-10 02:26:22.679287', 'step': 197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:22.708573', 'step': 197, 'epoch': 1} {'type': 'loss', 'content': 0.05037161707878113, 'timestamp': '2025-09-10 02:26:22.710060', 'step': 198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:22.738359', 'step': 198, 'epoch': 1} {'type': 'loss', 'content': 0.07397724688053131, 'timestamp': '2025-09-10 02:26:22.739819', 'step': 199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:22.767899', 'step': 199, 'epoch': 1} {'type': 'loss', 'content': 0.059347983449697495, 'timestamp': '2025-09-10 02:26:22.791114', 'step': 200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:22.819227', 'step': 200, 'epoch': 1} {'type': 'loss', 'content': 0.014053313061594963, 'timestamp': '2025-09-10 02:26:22.820644', 'step': 201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:22.848434', 'step': 201, 'epoch': 1} {'type': 'loss', 'content': 0.02120036818087101, 'timestamp': '2025-09-10 02:26:22.849833', 'step': 202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:22.878051', 'step': 202, 'epoch': 1} {'type': 'loss', 'content': 0.03766484931111336, 'timestamp': '2025-09-10 02:26:22.879419', 'step': 203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:22.908173', 'step': 203, 'epoch': 1} {'type': 'loss', 'content': 0.04409381002187729, 'timestamp': '2025-09-10 02:26:22.931082', 'step': 204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:22.959405', 'step': 204, 'epoch': 1} {'type': 'loss', 'content': 0.009215420112013817, 'timestamp': '2025-09-10 02:26:22.960760', 'step': 205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:22.988966', 'step': 205, 'epoch': 1} {'type': 'loss', 'content': 0.052273061126470566, 'timestamp': '2025-09-10 02:26:22.990152', 'step': 206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:23.018590', 'step': 206, 'epoch': 1} {'type': 'loss', 'content': 0.0432240329682827, 'timestamp': '2025-09-10 02:26:23.020104', 'step': 207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:23.048231', 'step': 207, 'epoch': 1} {'type': 'loss', 'content': 0.034780390560626984, 'timestamp': '2025-09-10 02:26:23.071521', 'step': 208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:23.100987', 'step': 208, 'epoch': 1} {'type': 'loss', 'content': 0.04283388704061508, 'timestamp': '2025-09-10 02:26:23.102834', 'step': 209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:23.131643', 'step': 209, 'epoch': 1} {'type': 'loss', 'content': 0.022248506546020508, 'timestamp': '2025-09-10 02:26:23.133269', 'step': 210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:23.162059', 'step': 210, 'epoch': 1} {'type': 'loss', 'content': 0.05728680640459061, 'timestamp': '2025-09-10 02:26:23.163686', 'step': 211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:23.192244', 'step': 211, 'epoch': 1} {'type': 'loss', 'content': 0.027303336188197136, 'timestamp': '2025-09-10 02:26:23.215400', 'step': 212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:23.244106', 'step': 212, 'epoch': 1} {'type': 'loss', 'content': 0.04928375035524368, 'timestamp': '2025-09-10 02:26:23.245604', 'step': 213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:23.274221', 'step': 213, 'epoch': 1} {'type': 'loss', 'content': 0.05382887274026871, 'timestamp': '2025-09-10 02:26:23.275552', 'step': 214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:23.303867', 'step': 214, 'epoch': 1} {'type': 'loss', 'content': 0.05966060236096382, 'timestamp': '2025-09-10 02:26:23.305555', 'step': 215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:23.333772', 'step': 215, 'epoch': 1} {'type': 'loss', 'content': 0.02548491768538952, 'timestamp': '2025-09-10 02:26:23.357028', 'step': 216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:23.385609', 'step': 216, 'epoch': 1} {'type': 'loss', 'content': 0.023930877447128296, 'timestamp': '2025-09-10 02:26:23.387413', 'step': 217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:23.415962', 'step': 217, 'epoch': 1} {'type': 'loss', 'content': 0.04809613898396492, 'timestamp': '2025-09-10 02:26:23.417366', 'step': 218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:23.446047', 'step': 218, 'epoch': 1} {'type': 'loss', 'content': 0.05492384359240532, 'timestamp': '2025-09-10 02:26:23.447487', 'step': 219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:23.475862', 'step': 219, 'epoch': 1} {'type': 'loss', 'content': 0.0793657973408699, 'timestamp': '2025-09-10 02:26:23.499124', 'step': 220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:23.527543', 'step': 220, 'epoch': 1} {'type': 'loss', 'content': 0.02191893383860588, 'timestamp': '2025-09-10 02:26:23.529118', 'step': 221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:23.557487', 'step': 221, 'epoch': 1} {'type': 'loss', 'content': 0.04995134845376015, 'timestamp': '2025-09-10 02:26:23.559136', 'step': 222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:23.587667', 'step': 222, 'epoch': 1} {'type': 'loss', 'content': 0.05806097760796547, 'timestamp': '2025-09-10 02:26:23.589189', 'step': 223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:23.617464', 'step': 223, 'epoch': 1} {'type': 'loss', 'content': 0.0502280592918396, 'timestamp': '2025-09-10 02:26:23.640899', 'step': 224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:23.669794', 'step': 224, 'epoch': 1} {'type': 'loss', 'content': 0.05602055415511131, 'timestamp': '2025-09-10 02:26:23.671377', 'step': 225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:23.699700', 'step': 225, 'epoch': 1} {'type': 'loss', 'content': 0.02692429907619953, 'timestamp': '2025-09-10 02:26:23.701562', 'step': 226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:23.729516', 'step': 226, 'epoch': 1} {'type': 'loss', 'content': 0.07570789754390717, 'timestamp': '2025-09-10 02:26:23.730929', 'step': 227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:23.758721', 'step': 227, 'epoch': 1} {'type': 'loss', 'content': 0.03995998576283455, 'timestamp': '2025-09-10 02:26:23.782155', 'step': 228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:23.810338', 'step': 228, 'epoch': 1} {'type': 'loss', 'content': 0.023368533700704575, 'timestamp': '2025-09-10 02:26:23.812031', 'step': 229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:23.840340', 'step': 229, 'epoch': 1} {'type': 'loss', 'content': 0.05306321382522583, 'timestamp': '2025-09-10 02:26:23.841766', 'step': 230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:23.869739', 'step': 230, 'epoch': 1} {'type': 'loss', 'content': 0.017180833965539932, 'timestamp': '2025-09-10 02:26:23.871459', 'step': 231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:23.900307', 'step': 231, 'epoch': 1} {'type': 'loss', 'content': 0.02015542984008789, 'timestamp': '2025-09-10 02:26:23.923502', 'step': 232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:23.952647', 'step': 232, 'epoch': 1} {'type': 'loss', 'content': 0.043472178280353546, 'timestamp': '2025-09-10 02:26:23.954655', 'step': 233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:23.982808', 'step': 233, 'epoch': 1} {'type': 'loss', 'content': 0.042272455990314484, 'timestamp': '2025-09-10 02:26:23.984249', 'step': 234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:24.012630', 'step': 234, 'epoch': 1} {'type': 'loss', 'content': 0.0589950866997242, 'timestamp': '2025-09-10 02:26:24.014333', 'step': 235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:24.042538', 'step': 235, 'epoch': 1} {'type': 'loss', 'content': 0.012602618895471096, 'timestamp': '2025-09-10 02:26:24.065811', 'step': 236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:24.094518', 'step': 236, 'epoch': 1} {'type': 'loss', 'content': 0.02325502410531044, 'timestamp': '2025-09-10 02:26:24.096016', 'step': 237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:24.124289', 'step': 237, 'epoch': 1} {'type': 'loss', 'content': 0.019194353371858597, 'timestamp': '2025-09-10 02:26:24.126113', 'step': 238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:24.154442', 'step': 238, 'epoch': 1} {'type': 'loss', 'content': 0.03466707095503807, 'timestamp': '2025-09-10 02:26:24.156128', 'step': 239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:24.184472', 'step': 239, 'epoch': 1} {'type': 'loss', 'content': 0.027127530425786972, 'timestamp': '2025-09-10 02:26:24.207676', 'step': 240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:24.236747', 'step': 240, 'epoch': 1} {'type': 'loss', 'content': 0.010394366458058357, 'timestamp': '2025-09-10 02:26:24.239310', 'step': 241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:24.268133', 'step': 241, 'epoch': 1} {'type': 'loss', 'content': 0.05274328589439392, 'timestamp': '2025-09-10 02:26:24.269714', 'step': 242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:24.298342', 'step': 242, 'epoch': 1} {'type': 'loss', 'content': 0.03417865186929703, 'timestamp': '2025-09-10 02:26:24.300067', 'step': 243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:24.328345', 'step': 243, 'epoch': 1} {'type': 'loss', 'content': 0.022119751200079918, 'timestamp': '2025-09-10 02:26:24.351448', 'step': 244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:24.379231', 'step': 244, 'epoch': 1} {'type': 'loss', 'content': 0.05870979651808739, 'timestamp': '2025-09-10 02:26:24.380881', 'step': 245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:24.408380', 'step': 245, 'epoch': 1} {'type': 'loss', 'content': 0.042090386152267456, 'timestamp': '2025-09-10 02:26:24.409965', 'step': 246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:24.437764', 'step': 246, 'epoch': 1} {'type': 'loss', 'content': 0.04785066470503807, 'timestamp': '2025-09-10 02:26:24.439297', 'step': 247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:24.467589', 'step': 247, 'epoch': 1} {'type': 'loss', 'content': 0.03291523829102516, 'timestamp': '2025-09-10 02:26:24.490680', 'step': 248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:24.519144', 'step': 248, 'epoch': 1} {'type': 'loss', 'content': 0.04091138020157814, 'timestamp': '2025-09-10 02:26:24.520799', 'step': 249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:24.548822', 'step': 249, 'epoch': 1} {'type': 'loss', 'content': 0.03316140174865723, 'timestamp': '2025-09-10 02:26:24.550258', 'step': 250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:24.578359', 'step': 250, 'epoch': 1} {'type': 'loss', 'content': 0.034854013472795486, 'timestamp': '2025-09-10 02:26:24.579803', 'step': 251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:24.607541', 'step': 251, 'epoch': 1} {'type': 'loss', 'content': 0.06467428058385849, 'timestamp': '2025-09-10 02:26:24.630608', 'step': 252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:24.661119', 'step': 252, 'epoch': 1} {'type': 'loss', 'content': 0.0717066153883934, 'timestamp': '2025-09-10 02:26:24.662639', 'step': 253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:24.690575', 'step': 253, 'epoch': 1} {'type': 'loss', 'content': 0.03744388371706009, 'timestamp': '2025-09-10 02:26:24.692103', 'step': 254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:24.720162', 'step': 254, 'epoch': 1} {'type': 'loss', 'content': 0.034172169864177704, 'timestamp': '2025-09-10 02:26:24.721671', 'step': 255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:24.749322', 'step': 255, 'epoch': 1} {'type': 'loss', 'content': 0.04225926846265793, 'timestamp': '2025-09-10 02:26:24.772543', 'step': 256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:24.800634', 'step': 256, 'epoch': 1} {'type': 'loss', 'content': 0.040328167378902435, 'timestamp': '2025-09-10 02:26:24.802285', 'step': 257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:24.830388', 'step': 257, 'epoch': 1} {'type': 'loss', 'content': 0.038004156202077866, 'timestamp': '2025-09-10 02:26:24.831991', 'step': 258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:24.859825', 'step': 258, 'epoch': 1} {'type': 'loss', 'content': 0.022771785035729408, 'timestamp': '2025-09-10 02:26:24.861514', 'step': 259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:24.889773', 'step': 259, 'epoch': 1} {'type': 'loss', 'content': 0.03296079486608505, 'timestamp': '2025-09-10 02:26:24.912867', 'step': 260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:24.941504', 'step': 260, 'epoch': 1} {'type': 'loss', 'content': 0.04581866413354874, 'timestamp': '2025-09-10 02:26:24.942995', 'step': 261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:24.970835', 'step': 261, 'epoch': 1} {'type': 'loss', 'content': 0.024789679795503616, 'timestamp': '2025-09-10 02:26:24.972438', 'step': 262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:25.000486', 'step': 262, 'epoch': 1} {'type': 'loss', 'content': 0.03151870146393776, 'timestamp': '2025-09-10 02:26:25.002080', 'step': 263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:25.029876', 'step': 263, 'epoch': 1} {'type': 'loss', 'content': 0.04139602929353714, 'timestamp': '2025-09-10 02:26:25.053004', 'step': 264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:25.081238', 'step': 264, 'epoch': 1} {'type': 'loss', 'content': 0.03249174356460571, 'timestamp': '2025-09-10 02:26:25.083009', 'step': 265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:25.111405', 'step': 265, 'epoch': 1} {'type': 'loss', 'content': 0.00858510285615921, 'timestamp': '2025-09-10 02:26:25.113002', 'step': 266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:25.141828', 'step': 266, 'epoch': 1} {'type': 'loss', 'content': 0.013428745791316032, 'timestamp': '2025-09-10 02:26:25.143369', 'step': 267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:25.171649', 'step': 267, 'epoch': 1} {'type': 'loss', 'content': 0.02350384183228016, 'timestamp': '2025-09-10 02:26:25.194926', 'step': 268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:25.222959', 'step': 268, 'epoch': 1} {'type': 'loss', 'content': 0.0218183733522892, 'timestamp': '2025-09-10 02:26:25.224586', 'step': 269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:25.252694', 'step': 269, 'epoch': 1} {'type': 'loss', 'content': 0.06323713809251785, 'timestamp': '2025-09-10 02:26:25.254091', 'step': 270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:25.282153', 'step': 270, 'epoch': 1} {'type': 'loss', 'content': 0.014637206681072712, 'timestamp': '2025-09-10 02:26:25.283831', 'step': 271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:25.312251', 'step': 271, 'epoch': 1} {'type': 'loss', 'content': 0.04055526852607727, 'timestamp': '2025-09-10 02:26:25.335143', 'step': 272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:25.363879', 'step': 272, 'epoch': 1} {'type': 'loss', 'content': 0.01910637505352497, 'timestamp': '2025-09-10 02:26:25.364992', 'step': 273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:25.392997', 'step': 273, 'epoch': 1} {'type': 'loss', 'content': 0.04856686666607857, 'timestamp': '2025-09-10 02:26:25.394622', 'step': 274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:25.422567', 'step': 274, 'epoch': 1} {'type': 'loss', 'content': 0.04855852574110031, 'timestamp': '2025-09-10 02:26:25.423932', 'step': 275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:25.451794', 'step': 275, 'epoch': 1} {'type': 'loss', 'content': 0.014289570041000843, 'timestamp': '2025-09-10 02:26:25.474936', 'step': 276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:25.502817', 'step': 276, 'epoch': 1} {'type': 'loss', 'content': 0.10124220699071884, 'timestamp': '2025-09-10 02:26:25.504467', 'step': 277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:25.532587', 'step': 277, 'epoch': 1} {'type': 'loss', 'content': 0.027577316388487816, 'timestamp': '2025-09-10 02:26:25.534069', 'step': 278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:25.562061', 'step': 278, 'epoch': 1} {'type': 'loss', 'content': 0.09541350603103638, 'timestamp': '2025-09-10 02:26:25.563767', 'step': 279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:25.591997', 'step': 279, 'epoch': 1} {'type': 'loss', 'content': 0.04130064696073532, 'timestamp': '2025-09-10 02:26:25.614892', 'step': 280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:25.643227', 'step': 280, 'epoch': 1} {'type': 'loss', 'content': 0.024617092683911324, 'timestamp': '2025-09-10 02:26:25.645047', 'step': 281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:25.673124', 'step': 281, 'epoch': 1} {'type': 'loss', 'content': 0.04850497096776962, 'timestamp': '2025-09-10 02:26:25.674963', 'step': 282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:25.703218', 'step': 282, 'epoch': 1} {'type': 'loss', 'content': 0.02256542444229126, 'timestamp': '2025-09-10 02:26:25.705824', 'step': 283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:25.735526', 'step': 283, 'epoch': 1} {'type': 'loss', 'content': 0.03141311928629875, 'timestamp': '2025-09-10 02:26:25.758424', 'step': 284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:25.786285', 'step': 284, 'epoch': 1} {'type': 'loss', 'content': 0.02413778007030487, 'timestamp': '2025-09-10 02:26:25.787884', 'step': 285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:25.815599', 'step': 285, 'epoch': 1} {'type': 'loss', 'content': 0.04970148578286171, 'timestamp': '2025-09-10 02:26:25.817228', 'step': 286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:25.845182', 'step': 286, 'epoch': 1} {'type': 'loss', 'content': 0.0072316196747124195, 'timestamp': '2025-09-10 02:26:25.846849', 'step': 287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:25.874843', 'step': 287, 'epoch': 1} {'type': 'loss', 'content': 0.036510735750198364, 'timestamp': '2025-09-10 02:26:25.897636', 'step': 288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:25.925740', 'step': 288, 'epoch': 1} {'type': 'loss', 'content': 0.03437785059213638, 'timestamp': '2025-09-10 02:26:25.927246', 'step': 289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:25.955120', 'step': 289, 'epoch': 1} {'type': 'loss', 'content': 0.04750484228134155, 'timestamp': '2025-09-10 02:26:25.956598', 'step': 290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:25.984334', 'step': 290, 'epoch': 1} {'type': 'loss', 'content': 0.04326671361923218, 'timestamp': '2025-09-10 02:26:25.985741', 'step': 291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:26.013322', 'step': 291, 'epoch': 1} {'type': 'loss', 'content': 0.08769829571247101, 'timestamp': '2025-09-10 02:26:26.036309', 'step': 292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:26.064161', 'step': 292, 'epoch': 1} {'type': 'loss', 'content': 0.06923764199018478, 'timestamp': '2025-09-10 02:26:26.065701', 'step': 293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:26.093346', 'step': 293, 'epoch': 1} {'type': 'loss', 'content': 0.07286082208156586, 'timestamp': '2025-09-10 02:26:26.094899', 'step': 294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:26.123423', 'step': 294, 'epoch': 1} {'type': 'loss', 'content': 0.05491286516189575, 'timestamp': '2025-09-10 02:26:26.124903', 'step': 295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:26.152691', 'step': 295, 'epoch': 1} {'type': 'loss', 'content': 0.04864270240068436, 'timestamp': '2025-09-10 02:26:26.176021', 'step': 296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:26.204020', 'step': 296, 'epoch': 1} {'type': 'loss', 'content': 0.058525215834379196, 'timestamp': '2025-09-10 02:26:26.205527', 'step': 297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:26.233743', 'step': 297, 'epoch': 1} {'type': 'loss', 'content': 0.0449678897857666, 'timestamp': '2025-09-10 02:26:26.235093', 'step': 298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:26.262786', 'step': 298, 'epoch': 1} {'type': 'loss', 'content': 0.038681477308273315, 'timestamp': '2025-09-10 02:26:26.264395', 'step': 299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:26.292636', 'step': 299, 'epoch': 1} {'type': 'loss', 'content': 0.03218863531947136, 'timestamp': '2025-09-10 02:26:26.315507', 'step': 300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:26.343609', 'step': 300, 'epoch': 1} {'type': 'loss', 'content': 0.027742283418774605, 'timestamp': '2025-09-10 02:26:26.344935', 'step': 301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:26.372619', 'step': 301, 'epoch': 1} {'type': 'loss', 'content': 0.05454636365175247, 'timestamp': '2025-09-10 02:26:26.374330', 'step': 302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:26.402844', 'step': 302, 'epoch': 1} {'type': 'loss', 'content': 0.03219006583094597, 'timestamp': '2025-09-10 02:26:26.404301', 'step': 303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:26.432399', 'step': 303, 'epoch': 1} {'type': 'loss', 'content': 0.022633623331785202, 'timestamp': '2025-09-10 02:26:26.455528', 'step': 304, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1582003754624}], 'timestamp': '2025-09-10 02:26:28.287099', 'step': 304, 'epoch': 1} {'type': 'pplx', 'content': 2404055.911940932, 'timestamp': '2025-09-10 02:26:28.288565', 'step': 304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:28.315301', 'step': 304, 'epoch': 1} {'type': 'loss', 'content': 0.06278765946626663, 'timestamp': '2025-09-10 02:26:28.316596', 'step': 305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:28.344481', 'step': 305, 'epoch': 1} {'type': 'loss', 'content': 0.05780908837914467, 'timestamp': '2025-09-10 02:26:28.346105', 'step': 306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:28.373737', 'step': 306, 'epoch': 1} {'type': 'loss', 'content': 0.0264920461922884, 'timestamp': '2025-09-10 02:26:28.375032', 'step': 307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:28.402551', 'step': 307, 'epoch': 1} {'type': 'loss', 'content': 0.0319293811917305, 'timestamp': '2025-09-10 02:26:28.425268', 'step': 308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:28.452946', 'step': 308, 'epoch': 1} {'type': 'loss', 'content': 0.03297847881913185, 'timestamp': '2025-09-10 02:26:28.454275', 'step': 309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:28.481988', 'step': 309, 'epoch': 1} {'type': 'loss', 'content': 0.025076931342482567, 'timestamp': '2025-09-10 02:26:28.483306', 'step': 310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:28.511329', 'step': 310, 'epoch': 1} {'type': 'loss', 'content': 0.044106367975473404, 'timestamp': '2025-09-10 02:26:28.512730', 'step': 311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:28.540373', 'step': 311, 'epoch': 1} {'type': 'loss', 'content': 0.01750769279897213, 'timestamp': '2025-09-10 02:26:28.563519', 'step': 312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:28.591282', 'step': 312, 'epoch': 1} {'type': 'loss', 'content': 0.05220300331711769, 'timestamp': '2025-09-10 02:26:28.592883', 'step': 313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:28.620596', 'step': 313, 'epoch': 1} {'type': 'loss', 'content': 0.039120737463235855, 'timestamp': '2025-09-10 02:26:28.622019', 'step': 314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:28.649734', 'step': 314, 'epoch': 1} {'type': 'loss', 'content': 0.0120754549279809, 'timestamp': '2025-09-10 02:26:28.651290', 'step': 315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:28.679231', 'step': 315, 'epoch': 1} {'type': 'loss', 'content': 0.018747910857200623, 'timestamp': '2025-09-10 02:26:28.702267', 'step': 316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:28.730022', 'step': 316, 'epoch': 1} {'type': 'loss', 'content': 0.026074785739183426, 'timestamp': '2025-09-10 02:26:28.731214', 'step': 317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:28.759159', 'step': 317, 'epoch': 1} {'type': 'loss', 'content': 0.043595410883426666, 'timestamp': '2025-09-10 02:26:28.760489', 'step': 318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:28.788423', 'step': 318, 'epoch': 1} {'type': 'loss', 'content': 0.026542428880929947, 'timestamp': '2025-09-10 02:26:28.790054', 'step': 319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:28.818275', 'step': 319, 'epoch': 1} {'type': 'loss', 'content': 0.027812568470835686, 'timestamp': '2025-09-10 02:26:28.841584', 'step': 320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:28.869599', 'step': 320, 'epoch': 1} {'type': 'loss', 'content': 0.04263043776154518, 'timestamp': '2025-09-10 02:26:28.871266', 'step': 321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:28.899649', 'step': 321, 'epoch': 1} {'type': 'loss', 'content': 0.05384966358542442, 'timestamp': '2025-09-10 02:26:28.901071', 'step': 322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:28.928534', 'step': 322, 'epoch': 1} {'type': 'loss', 'content': 0.02516597881913185, 'timestamp': '2025-09-10 02:26:28.930222', 'step': 323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:28.958245', 'step': 323, 'epoch': 1} {'type': 'loss', 'content': 0.026139909401535988, 'timestamp': '2025-09-10 02:26:28.981131', 'step': 324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:29.009112', 'step': 324, 'epoch': 1} {'type': 'loss', 'content': 0.06678090989589691, 'timestamp': '2025-09-10 02:26:29.010501', 'step': 325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:29.038574', 'step': 325, 'epoch': 1} {'type': 'loss', 'content': 0.04635394737124443, 'timestamp': '2025-09-10 02:26:29.040095', 'step': 326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:29.067779', 'step': 326, 'epoch': 1} {'type': 'loss', 'content': 0.02118229679763317, 'timestamp': '2025-09-10 02:26:29.069291', 'step': 327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:29.097315', 'step': 327, 'epoch': 1} {'type': 'loss', 'content': 0.02434566244482994, 'timestamp': '2025-09-10 02:26:29.120209', 'step': 328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:29.147958', 'step': 328, 'epoch': 1} {'type': 'loss', 'content': 0.05468794330954552, 'timestamp': '2025-09-10 02:26:29.149141', 'step': 329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:29.176857', 'step': 329, 'epoch': 1} {'type': 'loss', 'content': 0.036136846989393234, 'timestamp': '2025-09-10 02:26:29.178020', 'step': 330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:29.205772', 'step': 330, 'epoch': 1} {'type': 'loss', 'content': 0.0247384961694479, 'timestamp': '2025-09-10 02:26:29.207121', 'step': 331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:29.234765', 'step': 331, 'epoch': 1} {'type': 'loss', 'content': 0.06029272824525833, 'timestamp': '2025-09-10 02:26:29.257530', 'step': 332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:29.285051', 'step': 332, 'epoch': 1} {'type': 'loss', 'content': 0.023629697039723396, 'timestamp': '2025-09-10 02:26:29.286641', 'step': 333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:29.315728', 'step': 333, 'epoch': 1} {'type': 'loss', 'content': 0.04386547580361366, 'timestamp': '2025-09-10 02:26:29.317136', 'step': 334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:29.344895', 'step': 334, 'epoch': 1} {'type': 'loss', 'content': 0.03868928551673889, 'timestamp': '2025-09-10 02:26:29.346552', 'step': 335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:29.374354', 'step': 335, 'epoch': 1} {'type': 'loss', 'content': 0.049940530210733414, 'timestamp': '2025-09-10 02:26:29.397501', 'step': 336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:29.425829', 'step': 336, 'epoch': 1} {'type': 'loss', 'content': 0.0499514676630497, 'timestamp': '2025-09-10 02:26:29.427480', 'step': 337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:29.456414', 'step': 337, 'epoch': 1} {'type': 'loss', 'content': 0.037628378719091415, 'timestamp': '2025-09-10 02:26:29.457910', 'step': 338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:29.486295', 'step': 338, 'epoch': 1} {'type': 'loss', 'content': 0.05219903960824013, 'timestamp': '2025-09-10 02:26:29.487984', 'step': 339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:29.515915', 'step': 339, 'epoch': 1} {'type': 'loss', 'content': 0.021979600191116333, 'timestamp': '2025-09-10 02:26:29.538793', 'step': 340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:29.567219', 'step': 340, 'epoch': 1} {'type': 'loss', 'content': 0.011105814017355442, 'timestamp': '2025-09-10 02:26:29.568345', 'step': 341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:29.596107', 'step': 341, 'epoch': 1} {'type': 'loss', 'content': 0.031031189486384392, 'timestamp': '2025-09-10 02:26:29.597302', 'step': 342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:29.625049', 'step': 342, 'epoch': 1} {'type': 'loss', 'content': 0.04790028557181358, 'timestamp': '2025-09-10 02:26:29.626178', 'step': 343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:29.654164', 'step': 343, 'epoch': 1} {'type': 'loss', 'content': 0.023874282836914062, 'timestamp': '2025-09-10 02:26:29.676876', 'step': 344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:29.705452', 'step': 344, 'epoch': 1} {'type': 'loss', 'content': 0.03260252997279167, 'timestamp': '2025-09-10 02:26:29.706882', 'step': 345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:29.735009', 'step': 345, 'epoch': 1} {'type': 'loss', 'content': 0.041003767400979996, 'timestamp': '2025-09-10 02:26:29.736889', 'step': 346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:29.765007', 'step': 346, 'epoch': 1} {'type': 'loss', 'content': 0.036079805344343185, 'timestamp': '2025-09-10 02:26:29.766550', 'step': 347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:29.794840', 'step': 347, 'epoch': 1} {'type': 'loss', 'content': 0.0438341461122036, 'timestamp': '2025-09-10 02:26:29.818061', 'step': 348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:29.846857', 'step': 348, 'epoch': 1} {'type': 'loss', 'content': 0.011688505299389362, 'timestamp': '2025-09-10 02:26:29.848472', 'step': 349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:29.877246', 'step': 349, 'epoch': 1} {'type': 'loss', 'content': 0.055797092616558075, 'timestamp': '2025-09-10 02:26:29.878766', 'step': 350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:29.906809', 'step': 350, 'epoch': 1} {'type': 'loss', 'content': 0.05401964858174324, 'timestamp': '2025-09-10 02:26:29.908331', 'step': 351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:29.936268', 'step': 351, 'epoch': 1} {'type': 'loss', 'content': 0.03948580473661423, 'timestamp': '2025-09-10 02:26:29.959372', 'step': 352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:29.987192', 'step': 352, 'epoch': 1} {'type': 'loss', 'content': 0.046306490898132324, 'timestamp': '2025-09-10 02:26:29.988639', 'step': 353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:30.016659', 'step': 353, 'epoch': 1} {'type': 'loss', 'content': 0.045786309987306595, 'timestamp': '2025-09-10 02:26:30.018133', 'step': 354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:30.046106', 'step': 354, 'epoch': 1} {'type': 'loss', 'content': 0.03784593939781189, 'timestamp': '2025-09-10 02:26:30.047503', 'step': 355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:30.075909', 'step': 355, 'epoch': 1} {'type': 'loss', 'content': 0.055302880704402924, 'timestamp': '2025-09-10 02:26:30.098942', 'step': 356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:30.127583', 'step': 356, 'epoch': 1} {'type': 'loss', 'content': 0.04041001945734024, 'timestamp': '2025-09-10 02:26:30.129300', 'step': 357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:30.157581', 'step': 357, 'epoch': 1} {'type': 'loss', 'content': 0.028578907251358032, 'timestamp': '2025-09-10 02:26:30.159428', 'step': 358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:30.187921', 'step': 358, 'epoch': 1} {'type': 'loss', 'content': 0.03948519378900528, 'timestamp': '2025-09-10 02:26:30.189305', 'step': 359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:30.217249', 'step': 359, 'epoch': 1} {'type': 'loss', 'content': 0.03630781173706055, 'timestamp': '2025-09-10 02:26:30.240087', 'step': 360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:30.268075', 'step': 360, 'epoch': 1} {'type': 'loss', 'content': 0.02764829434454441, 'timestamp': '2025-09-10 02:26:30.269573', 'step': 361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:30.297606', 'step': 361, 'epoch': 1} {'type': 'loss', 'content': 0.04786473512649536, 'timestamp': '2025-09-10 02:26:30.299181', 'step': 362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:30.327525', 'step': 362, 'epoch': 1} {'type': 'loss', 'content': 0.04653376340866089, 'timestamp': '2025-09-10 02:26:30.328795', 'step': 363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:30.356643', 'step': 363, 'epoch': 1} {'type': 'loss', 'content': 0.05551743507385254, 'timestamp': '2025-09-10 02:26:30.379504', 'step': 364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:30.407093', 'step': 364, 'epoch': 1} {'type': 'loss', 'content': 0.030665677040815353, 'timestamp': '2025-09-10 02:26:30.408375', 'step': 365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:30.436064', 'step': 365, 'epoch': 1} {'type': 'loss', 'content': 0.04767214134335518, 'timestamp': '2025-09-10 02:26:30.437275', 'step': 366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:30.464992', 'step': 366, 'epoch': 1} {'type': 'loss', 'content': 0.03540532663464546, 'timestamp': '2025-09-10 02:26:30.467534', 'step': 367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:30.495618', 'step': 367, 'epoch': 1} {'type': 'loss', 'content': 0.015588260255753994, 'timestamp': '2025-09-10 02:26:30.518804', 'step': 368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:30.547444', 'step': 368, 'epoch': 1} {'type': 'loss', 'content': 0.024345332756638527, 'timestamp': '2025-09-10 02:26:30.549045', 'step': 369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:30.577048', 'step': 369, 'epoch': 1} {'type': 'loss', 'content': 0.03131551668047905, 'timestamp': '2025-09-10 02:26:30.578875', 'step': 370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:30.607443', 'step': 370, 'epoch': 1} {'type': 'loss', 'content': 0.03911859542131424, 'timestamp': '2025-09-10 02:26:30.608823', 'step': 371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:30.636561', 'step': 371, 'epoch': 1} {'type': 'loss', 'content': 0.02644316479563713, 'timestamp': '2025-09-10 02:26:30.659794', 'step': 372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:30.688188', 'step': 372, 'epoch': 1} {'type': 'loss', 'content': 0.008261355571448803, 'timestamp': '2025-09-10 02:26:30.689777', 'step': 373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:30.718062', 'step': 373, 'epoch': 1} {'type': 'loss', 'content': 0.014204383827745914, 'timestamp': '2025-09-10 02:26:30.719783', 'step': 374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:30.747651', 'step': 374, 'epoch': 1} {'type': 'loss', 'content': 0.019057517871260643, 'timestamp': '2025-09-10 02:26:30.749145', 'step': 375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:30.777136', 'step': 375, 'epoch': 1} {'type': 'loss', 'content': 0.03331819921731949, 'timestamp': '2025-09-10 02:26:30.799951', 'step': 376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:30.827916', 'step': 376, 'epoch': 1} {'type': 'loss', 'content': 0.01622638665139675, 'timestamp': '2025-09-10 02:26:30.829426', 'step': 377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:30.857424', 'step': 377, 'epoch': 1} {'type': 'loss', 'content': 0.012481695041060448, 'timestamp': '2025-09-10 02:26:30.859089', 'step': 378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:30.887322', 'step': 378, 'epoch': 1} {'type': 'loss', 'content': 0.04068734496831894, 'timestamp': '2025-09-10 02:26:30.889238', 'step': 379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:30.917607', 'step': 379, 'epoch': 1} {'type': 'loss', 'content': 0.019415130838751793, 'timestamp': '2025-09-10 02:26:30.940485', 'step': 380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:30.968986', 'step': 380, 'epoch': 1} {'type': 'loss', 'content': 0.01683415099978447, 'timestamp': '2025-09-10 02:26:30.970732', 'step': 381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:30.998774', 'step': 381, 'epoch': 1} {'type': 'loss', 'content': 0.02263670042157173, 'timestamp': '2025-09-10 02:26:31.000342', 'step': 382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:31.028676', 'step': 382, 'epoch': 1} {'type': 'loss', 'content': 0.03133777156472206, 'timestamp': '2025-09-10 02:26:31.030254', 'step': 383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:31.058664', 'step': 383, 'epoch': 1} {'type': 'loss', 'content': 0.026273494586348534, 'timestamp': '2025-09-10 02:26:31.081769', 'step': 384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:31.110375', 'step': 384, 'epoch': 1} {'type': 'loss', 'content': 0.05546477437019348, 'timestamp': '2025-09-10 02:26:31.111806', 'step': 385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:31.139794', 'step': 385, 'epoch': 1} {'type': 'loss', 'content': 0.02453637681901455, 'timestamp': '2025-09-10 02:26:31.141375', 'step': 386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:31.169903', 'step': 386, 'epoch': 1} {'type': 'loss', 'content': 0.07124367356300354, 'timestamp': '2025-09-10 02:26:31.171271', 'step': 387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:31.199266', 'step': 387, 'epoch': 1} {'type': 'loss', 'content': 0.04478440433740616, 'timestamp': '2025-09-10 02:26:31.221897', 'step': 388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:31.249920', 'step': 388, 'epoch': 1} {'type': 'loss', 'content': 0.008734814822673798, 'timestamp': '2025-09-10 02:26:31.251377', 'step': 389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:31.279567', 'step': 389, 'epoch': 1} {'type': 'loss', 'content': 0.045698363333940506, 'timestamp': '2025-09-10 02:26:31.281077', 'step': 390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:31.308974', 'step': 390, 'epoch': 1} {'type': 'loss', 'content': 0.09974842518568039, 'timestamp': '2025-09-10 02:26:31.310450', 'step': 391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:31.338890', 'step': 391, 'epoch': 1} {'type': 'loss', 'content': 0.01415980700403452, 'timestamp': '2025-09-10 02:26:31.362052', 'step': 392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:31.391100', 'step': 392, 'epoch': 1} {'type': 'loss', 'content': 0.024075329303741455, 'timestamp': '2025-09-10 02:26:31.393084', 'step': 393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:31.421457', 'step': 393, 'epoch': 1} {'type': 'loss', 'content': 0.008073990233242512, 'timestamp': '2025-09-10 02:26:31.423444', 'step': 394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:31.451887', 'step': 394, 'epoch': 1} {'type': 'loss', 'content': 0.04262285679578781, 'timestamp': '2025-09-10 02:26:31.453910', 'step': 395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:31.482300', 'step': 395, 'epoch': 1} {'type': 'loss', 'content': 0.02701367810368538, 'timestamp': '2025-09-10 02:26:31.505723', 'step': 396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:31.535062', 'step': 396, 'epoch': 1} {'type': 'loss', 'content': 0.04329529032111168, 'timestamp': '2025-09-10 02:26:31.537661', 'step': 397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:31.566347', 'step': 397, 'epoch': 1} {'type': 'loss', 'content': 0.03120199404656887, 'timestamp': '2025-09-10 02:26:31.567536', 'step': 398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:31.595059', 'step': 398, 'epoch': 1} {'type': 'loss', 'content': 0.0885075181722641, 'timestamp': '2025-09-10 02:26:31.599871', 'step': 399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:31.627923', 'step': 399, 'epoch': 1} {'type': 'loss', 'content': 0.10204315930604935, 'timestamp': '2025-09-10 02:26:31.650766', 'step': 400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:31.678237', 'step': 400, 'epoch': 1} {'type': 'loss', 'content': 0.012907378375530243, 'timestamp': '2025-09-10 02:26:31.679474', 'step': 401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:31.707368', 'step': 401, 'epoch': 1} {'type': 'loss', 'content': 0.023699553683400154, 'timestamp': '2025-09-10 02:26:31.708817', 'step': 402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:31.736605', 'step': 402, 'epoch': 1} {'type': 'loss', 'content': 0.03839164972305298, 'timestamp': '2025-09-10 02:26:31.738485', 'step': 403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:31.766635', 'step': 403, 'epoch': 1} {'type': 'loss', 'content': 0.03132530674338341, 'timestamp': '2025-09-10 02:26:31.789876', 'step': 404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:31.818018', 'step': 404, 'epoch': 1} {'type': 'loss', 'content': 0.032234448939561844, 'timestamp': '2025-09-10 02:26:31.819622', 'step': 405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:31.847580', 'step': 405, 'epoch': 1} {'type': 'loss', 'content': 0.03030545823276043, 'timestamp': '2025-09-10 02:26:31.849443', 'step': 406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:31.877609', 'step': 406, 'epoch': 1} {'type': 'loss', 'content': 0.0347440131008625, 'timestamp': '2025-09-10 02:26:31.879350', 'step': 407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:31.907873', 'step': 407, 'epoch': 1} {'type': 'loss', 'content': 0.039909474551677704, 'timestamp': '2025-09-10 02:26:31.930982', 'step': 408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:31.959061', 'step': 408, 'epoch': 1} {'type': 'loss', 'content': 0.026259543374180794, 'timestamp': '2025-09-10 02:26:31.960569', 'step': 409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:31.988342', 'step': 409, 'epoch': 1} {'type': 'loss', 'content': 0.033669766038656235, 'timestamp': '2025-09-10 02:26:31.990124', 'step': 410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:32.018245', 'step': 410, 'epoch': 1} {'type': 'loss', 'content': 0.04910421371459961, 'timestamp': '2025-09-10 02:26:32.020048', 'step': 411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:32.048422', 'step': 411, 'epoch': 1} {'type': 'loss', 'content': 0.015350817702710629, 'timestamp': '2025-09-10 02:26:32.075488', 'step': 412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:32.107118', 'step': 412, 'epoch': 1} {'type': 'loss', 'content': 0.036262813955545425, 'timestamp': '2025-09-10 02:26:32.108702', 'step': 413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:32.137207', 'step': 413, 'epoch': 1} {'type': 'loss', 'content': 0.0357423797249794, 'timestamp': '2025-09-10 02:26:32.138538', 'step': 414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:32.166338', 'step': 414, 'epoch': 1} {'type': 'loss', 'content': 0.03278939053416252, 'timestamp': '2025-09-10 02:26:32.167916', 'step': 415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:32.196029', 'step': 415, 'epoch': 1} {'type': 'loss', 'content': 0.029303012415766716, 'timestamp': '2025-09-10 02:26:32.218913', 'step': 416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:32.251495', 'step': 416, 'epoch': 1} {'type': 'loss', 'content': 0.07521359622478485, 'timestamp': '2025-09-10 02:26:32.253062', 'step': 417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:32.281332', 'step': 417, 'epoch': 1} {'type': 'loss', 'content': 0.02829148806631565, 'timestamp': '2025-09-10 02:26:32.282978', 'step': 418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:32.311001', 'step': 418, 'epoch': 1} {'type': 'loss', 'content': 0.020268267020583153, 'timestamp': '2025-09-10 02:26:32.312470', 'step': 419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:32.340566', 'step': 419, 'epoch': 1} {'type': 'loss', 'content': 0.07261353731155396, 'timestamp': '2025-09-10 02:26:32.363735', 'step': 420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:32.392148', 'step': 420, 'epoch': 1} {'type': 'loss', 'content': 0.016099663451313972, 'timestamp': '2025-09-10 02:26:32.397304', 'step': 421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:32.428720', 'step': 421, 'epoch': 1} {'type': 'loss', 'content': 0.05036252364516258, 'timestamp': '2025-09-10 02:26:32.430471', 'step': 422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:32.459189', 'step': 422, 'epoch': 1} {'type': 'loss', 'content': 0.04243912547826767, 'timestamp': '2025-09-10 02:26:32.461080', 'step': 423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:32.489353', 'step': 423, 'epoch': 1} {'type': 'loss', 'content': 0.0400727204978466, 'timestamp': '2025-09-10 02:26:32.512441', 'step': 424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:32.540980', 'step': 424, 'epoch': 1} {'type': 'loss', 'content': 0.06823001056909561, 'timestamp': '2025-09-10 02:26:32.542800', 'step': 425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:32.571870', 'step': 425, 'epoch': 1} {'type': 'loss', 'content': 0.04684382677078247, 'timestamp': '2025-09-10 02:26:32.573713', 'step': 426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:32.602087', 'step': 426, 'epoch': 1} {'type': 'loss', 'content': 0.0509195439517498, 'timestamp': '2025-09-10 02:26:32.603773', 'step': 427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:32.632027', 'step': 427, 'epoch': 1} {'type': 'loss', 'content': 0.021338066086173058, 'timestamp': '2025-09-10 02:26:32.655272', 'step': 428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:32.683575', 'step': 428, 'epoch': 1} {'type': 'loss', 'content': 0.01432217936962843, 'timestamp': '2025-09-10 02:26:32.685303', 'step': 429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:32.713675', 'step': 429, 'epoch': 1} {'type': 'loss', 'content': 0.05419403314590454, 'timestamp': '2025-09-10 02:26:32.715288', 'step': 430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:32.743675', 'step': 430, 'epoch': 1} {'type': 'loss', 'content': 0.008366560563445091, 'timestamp': '2025-09-10 02:26:32.745515', 'step': 431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:32.773818', 'step': 431, 'epoch': 1} {'type': 'loss', 'content': 0.013925564475357533, 'timestamp': '2025-09-10 02:26:32.796975', 'step': 432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:32.825888', 'step': 432, 'epoch': 1} {'type': 'loss', 'content': 0.0459279790520668, 'timestamp': '2025-09-10 02:26:32.827480', 'step': 433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:32.856834', 'step': 433, 'epoch': 1} {'type': 'loss', 'content': 0.055198825895786285, 'timestamp': '2025-09-10 02:26:32.859418', 'step': 434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:32.890232', 'step': 434, 'epoch': 1} {'type': 'loss', 'content': 0.03116236999630928, 'timestamp': '2025-09-10 02:26:32.891883', 'step': 435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:32.920230', 'step': 435, 'epoch': 1} {'type': 'loss', 'content': 0.03788456693291664, 'timestamp': '2025-09-10 02:26:32.943709', 'step': 436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:32.972208', 'step': 436, 'epoch': 1} {'type': 'loss', 'content': 0.046811241656541824, 'timestamp': '2025-09-10 02:26:32.973770', 'step': 437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:33.001853', 'step': 437, 'epoch': 1} {'type': 'loss', 'content': 0.04154620319604874, 'timestamp': '2025-09-10 02:26:33.003420', 'step': 438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:33.031448', 'step': 438, 'epoch': 1} {'type': 'loss', 'content': 0.041397593915462494, 'timestamp': '2025-09-10 02:26:33.032992', 'step': 439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:33.061118', 'step': 439, 'epoch': 1} {'type': 'loss', 'content': 0.019992543384432793, 'timestamp': '2025-09-10 02:26:33.084098', 'step': 440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:33.115521', 'step': 440, 'epoch': 1} {'type': 'loss', 'content': 0.049645211547613144, 'timestamp': '2025-09-10 02:26:33.116881', 'step': 441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:33.145136', 'step': 441, 'epoch': 1} {'type': 'loss', 'content': 0.016393227502703667, 'timestamp': '2025-09-10 02:26:33.146775', 'step': 442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:33.174723', 'step': 442, 'epoch': 1} {'type': 'loss', 'content': 0.05730712413787842, 'timestamp': '2025-09-10 02:26:33.176407', 'step': 443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:33.204681', 'step': 443, 'epoch': 1} {'type': 'loss', 'content': 0.037708453834056854, 'timestamp': '2025-09-10 02:26:33.228018', 'step': 444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:33.256438', 'step': 444, 'epoch': 1} {'type': 'loss', 'content': 0.055815424770116806, 'timestamp': '2025-09-10 02:26:33.258270', 'step': 445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:33.286694', 'step': 445, 'epoch': 1} {'type': 'loss', 'content': 0.037254512310028076, 'timestamp': '2025-09-10 02:26:33.288254', 'step': 446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:33.316389', 'step': 446, 'epoch': 1} {'type': 'loss', 'content': 0.02852635085582733, 'timestamp': '2025-09-10 02:26:33.318218', 'step': 447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:33.346191', 'step': 447, 'epoch': 1} {'type': 'loss', 'content': 0.08622664958238602, 'timestamp': '2025-09-10 02:26:33.369525', 'step': 448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:33.398301', 'step': 448, 'epoch': 1} {'type': 'loss', 'content': 0.03732261806726456, 'timestamp': '2025-09-10 02:26:33.400137', 'step': 449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:33.428564', 'step': 449, 'epoch': 1} {'type': 'loss', 'content': 0.033844877034425735, 'timestamp': '2025-09-10 02:26:33.430163', 'step': 450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:33.458256', 'step': 450, 'epoch': 1} {'type': 'loss', 'content': 0.012706798501312733, 'timestamp': '2025-09-10 02:26:33.459888', 'step': 451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:33.488587', 'step': 451, 'epoch': 1} {'type': 'loss', 'content': 0.049883805215358734, 'timestamp': '2025-09-10 02:26:33.511874', 'step': 452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:33.540676', 'step': 452, 'epoch': 1} {'type': 'loss', 'content': 0.042035240679979324, 'timestamp': '2025-09-10 02:26:33.542283', 'step': 453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:33.570559', 'step': 453, 'epoch': 1} {'type': 'loss', 'content': 0.01677936501801014, 'timestamp': '2025-09-10 02:26:33.572086', 'step': 454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:33.600106', 'step': 454, 'epoch': 1} {'type': 'loss', 'content': 0.030457666143774986, 'timestamp': '2025-09-10 02:26:33.601580', 'step': 455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:33.629486', 'step': 455, 'epoch': 1} {'type': 'loss', 'content': 0.07816998660564423, 'timestamp': '2025-09-10 02:26:33.652843', 'step': 456, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1582003754624}], 'timestamp': '2025-09-10 02:26:35.499397', 'step': 456, 'epoch': 1} {'type': 'pplx', 'content': 2717367.289805951, 'timestamp': '2025-09-10 02:26:35.501821', 'step': 456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:35.529451', 'step': 456, 'epoch': 1} {'type': 'loss', 'content': 0.016826681792736053, 'timestamp': '2025-09-10 02:26:35.531071', 'step': 457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:35.560166', 'step': 457, 'epoch': 1} {'type': 'loss', 'content': 0.02761121466755867, 'timestamp': '2025-09-10 02:26:35.561734', 'step': 458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:35.589860', 'step': 458, 'epoch': 1} {'type': 'loss', 'content': 0.03611263260245323, 'timestamp': '2025-09-10 02:26:35.591796', 'step': 459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:35.622110', 'step': 459, 'epoch': 1} {'type': 'loss', 'content': 0.03756078705191612, 'timestamp': '2025-09-10 02:26:35.645806', 'step': 460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:35.674643', 'step': 460, 'epoch': 1} {'type': 'loss', 'content': 0.02756829559803009, 'timestamp': '2025-09-10 02:26:35.676390', 'step': 461, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:35.704519', 'step': 461, 'epoch': 1} {'type': 'loss', 'content': 0.02360410988330841, 'timestamp': '2025-09-10 02:26:35.707313', 'step': 462, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:35.736060', 'step': 462, 'epoch': 1} {'type': 'loss', 'content': 0.03001396730542183, 'timestamp': '2025-09-10 02:26:35.737576', 'step': 463, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:35.765749', 'step': 463, 'epoch': 1} {'type': 'loss', 'content': 0.024144025519490242, 'timestamp': '2025-09-10 02:26:35.789157', 'step': 464, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:35.818311', 'step': 464, 'epoch': 1} {'type': 'loss', 'content': 0.08317694813013077, 'timestamp': '2025-09-10 02:26:35.820179', 'step': 465, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:35.848294', 'step': 465, 'epoch': 1} {'type': 'loss', 'content': 0.03497004508972168, 'timestamp': '2025-09-10 02:26:35.849997', 'step': 466, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:35.878330', 'step': 466, 'epoch': 1} {'type': 'loss', 'content': 0.0486321821808815, 'timestamp': '2025-09-10 02:26:35.880259', 'step': 467, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:35.908964', 'step': 467, 'epoch': 1} {'type': 'loss', 'content': 0.04043663293123245, 'timestamp': '2025-09-10 02:26:35.932102', 'step': 468, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:35.960463', 'step': 468, 'epoch': 1} {'type': 'loss', 'content': 0.026344984769821167, 'timestamp': '2025-09-10 02:26:35.962097', 'step': 469, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:35.990298', 'step': 469, 'epoch': 1} {'type': 'loss', 'content': 0.019227301701903343, 'timestamp': '2025-09-10 02:26:35.992162', 'step': 470, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:36.020548', 'step': 470, 'epoch': 1} {'type': 'loss', 'content': 0.04276519641280174, 'timestamp': '2025-09-10 02:26:36.022406', 'step': 471, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:36.050688', 'step': 471, 'epoch': 1} {'type': 'loss', 'content': 0.042632218450307846, 'timestamp': '2025-09-10 02:26:36.074076', 'step': 472, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:36.102274', 'step': 472, 'epoch': 1} {'type': 'loss', 'content': 0.016928521916270256, 'timestamp': '2025-09-10 02:26:36.103506', 'step': 473, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:36.131556', 'step': 473, 'epoch': 1} {'type': 'loss', 'content': 0.0740702748298645, 'timestamp': '2025-09-10 02:26:36.133879', 'step': 474, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:36.161893', 'step': 474, 'epoch': 1} {'type': 'loss', 'content': 0.022200632840394974, 'timestamp': '2025-09-10 02:26:36.163513', 'step': 475, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:36.191631', 'step': 475, 'epoch': 1} {'type': 'loss', 'content': 0.04462885856628418, 'timestamp': '2025-09-10 02:26:36.214713', 'step': 476, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:36.243147', 'step': 476, 'epoch': 1} {'type': 'loss', 'content': 0.02617846056818962, 'timestamp': '2025-09-10 02:26:36.244801', 'step': 477, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:36.272786', 'step': 477, 'epoch': 1} {'type': 'loss', 'content': 0.043365441262722015, 'timestamp': '2025-09-10 02:26:36.274447', 'step': 478, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:36.302677', 'step': 478, 'epoch': 1} {'type': 'loss', 'content': 0.03110249526798725, 'timestamp': '2025-09-10 02:26:36.304054', 'step': 479, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:36.331895', 'step': 479, 'epoch': 1} {'type': 'loss', 'content': 0.05477583780884743, 'timestamp': '2025-09-10 02:26:36.354760', 'step': 480, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:36.383314', 'step': 480, 'epoch': 1} {'type': 'loss', 'content': 0.026761312037706375, 'timestamp': '2025-09-10 02:26:36.384919', 'step': 481, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:36.413059', 'step': 481, 'epoch': 1} {'type': 'loss', 'content': 0.059805192053318024, 'timestamp': '2025-09-10 02:26:36.414716', 'step': 482, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:36.442850', 'step': 482, 'epoch': 1} {'type': 'loss', 'content': 0.020811093971133232, 'timestamp': '2025-09-10 02:26:36.444388', 'step': 483, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:36.472480', 'step': 483, 'epoch': 1} {'type': 'loss', 'content': 0.057123102247714996, 'timestamp': '2025-09-10 02:26:36.496184', 'step': 484, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:36.525209', 'step': 484, 'epoch': 1} {'type': 'loss', 'content': 0.060989223420619965, 'timestamp': '2025-09-10 02:26:36.526619', 'step': 485, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:36.554471', 'step': 485, 'epoch': 1} {'type': 'loss', 'content': 0.06589416414499283, 'timestamp': '2025-09-10 02:26:36.556596', 'step': 486, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:36.584714', 'step': 486, 'epoch': 1} {'type': 'loss', 'content': 0.03613222762942314, 'timestamp': '2025-09-10 02:26:36.586385', 'step': 487, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:36.615148', 'step': 487, 'epoch': 1} {'type': 'loss', 'content': 0.01947481371462345, 'timestamp': '2025-09-10 02:26:36.638079', 'step': 488, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:36.666505', 'step': 488, 'epoch': 1} {'type': 'loss', 'content': 0.05823732540011406, 'timestamp': '2025-09-10 02:26:36.668053', 'step': 489, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:36.696430', 'step': 489, 'epoch': 1} {'type': 'loss', 'content': 0.020005151629447937, 'timestamp': '2025-09-10 02:26:36.698267', 'step': 490, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:36.726800', 'step': 490, 'epoch': 1} {'type': 'loss', 'content': 0.044948723167181015, 'timestamp': '2025-09-10 02:26:36.728987', 'step': 491, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:36.757039', 'step': 491, 'epoch': 1} {'type': 'loss', 'content': 0.05590436980128288, 'timestamp': '2025-09-10 02:26:36.780315', 'step': 492, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:36.808308', 'step': 492, 'epoch': 1} {'type': 'loss', 'content': 0.07437480986118317, 'timestamp': '2025-09-10 02:26:36.809808', 'step': 493, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:36.837671', 'step': 493, 'epoch': 1} {'type': 'loss', 'content': 0.057792190462350845, 'timestamp': '2025-09-10 02:26:36.839193', 'step': 494, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:36.866904', 'step': 494, 'epoch': 1} {'type': 'loss', 'content': 0.05404549092054367, 'timestamp': '2025-09-10 02:26:36.868490', 'step': 495, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:36.896378', 'step': 495, 'epoch': 1} {'type': 'loss', 'content': 0.02384149469435215, 'timestamp': '2025-09-10 02:26:36.919406', 'step': 496, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:36.947521', 'step': 496, 'epoch': 1} {'type': 'loss', 'content': 0.031597964465618134, 'timestamp': '2025-09-10 02:26:36.949120', 'step': 497, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:36.977087', 'step': 497, 'epoch': 1} {'type': 'loss', 'content': 0.038688305765390396, 'timestamp': '2025-09-10 02:26:36.978762', 'step': 498, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:37.007028', 'step': 498, 'epoch': 1} {'type': 'loss', 'content': 0.030446495860815048, 'timestamp': '2025-09-10 02:26:37.008691', 'step': 499, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:37.036385', 'step': 499, 'epoch': 1} {'type': 'loss', 'content': 0.021534087136387825, 'timestamp': '2025-09-10 02:26:37.059563', 'step': 500, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 500', 'timestamp': '2025-09-10 02:26:41.537321', 'step': 500, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:41.568606', 'step': 500, 'epoch': 1} {'type': 'loss', 'content': 0.01570574752986431, 'timestamp': '2025-09-10 02:26:41.570127', 'step': 501, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:41.598659', 'step': 501, 'epoch': 1} {'type': 'loss', 'content': 0.02040550298988819, 'timestamp': '2025-09-10 02:26:41.600305', 'step': 502, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:41.628787', 'step': 502, 'epoch': 1} {'type': 'loss', 'content': 0.0573415644466877, 'timestamp': '2025-09-10 02:26:41.630595', 'step': 503, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:41.659119', 'step': 503, 'epoch': 1} {'type': 'loss', 'content': 0.030473779886960983, 'timestamp': '2025-09-10 02:26:41.682585', 'step': 504, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:41.711042', 'step': 504, 'epoch': 1} {'type': 'loss', 'content': 0.03402290865778923, 'timestamp': '2025-09-10 02:26:41.712577', 'step': 505, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:41.740446', 'step': 505, 'epoch': 1} {'type': 'loss', 'content': 0.03856948763132095, 'timestamp': '2025-09-10 02:26:41.742043', 'step': 506, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:41.770446', 'step': 506, 'epoch': 1} {'type': 'loss', 'content': 0.03185379132628441, 'timestamp': '2025-09-10 02:26:41.772222', 'step': 507, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:41.800894', 'step': 507, 'epoch': 1} {'type': 'loss', 'content': 0.05316280573606491, 'timestamp': '2025-09-10 02:26:41.823772', 'step': 508, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:41.852227', 'step': 508, 'epoch': 1} {'type': 'loss', 'content': 0.024660510942339897, 'timestamp': '2025-09-10 02:26:41.853425', 'step': 509, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:41.881382', 'step': 509, 'epoch': 1} {'type': 'loss', 'content': 0.04275422543287277, 'timestamp': '2025-09-10 02:26:41.883014', 'step': 510, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:41.911031', 'step': 510, 'epoch': 1} {'type': 'loss', 'content': 0.04156554862856865, 'timestamp': '2025-09-10 02:26:41.912686', 'step': 511, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:41.940694', 'step': 511, 'epoch': 1} {'type': 'loss', 'content': 0.03838549181818962, 'timestamp': '2025-09-10 02:26:41.963968', 'step': 512, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:41.992188', 'step': 512, 'epoch': 1} {'type': 'loss', 'content': 0.04556984454393387, 'timestamp': '2025-09-10 02:26:41.993830', 'step': 513, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:42.022203', 'step': 513, 'epoch': 1} {'type': 'loss', 'content': 0.04985576868057251, 'timestamp': '2025-09-10 02:26:42.023714', 'step': 514, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:42.051442', 'step': 514, 'epoch': 1} {'type': 'loss', 'content': 0.0277482271194458, 'timestamp': '2025-09-10 02:26:42.053184', 'step': 515, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:42.081291', 'step': 515, 'epoch': 1} {'type': 'loss', 'content': 0.048427700996398926, 'timestamp': '2025-09-10 02:26:42.104531', 'step': 516, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:42.132991', 'step': 516, 'epoch': 1} {'type': 'loss', 'content': 0.007362372241914272, 'timestamp': '2025-09-10 02:26:42.134766', 'step': 517, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:42.163506', 'step': 517, 'epoch': 1} {'type': 'loss', 'content': 0.0382639579474926, 'timestamp': '2025-09-10 02:26:42.165013', 'step': 518, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:42.193298', 'step': 518, 'epoch': 1} {'type': 'loss', 'content': 0.03767085075378418, 'timestamp': '2025-09-10 02:26:42.195534', 'step': 519, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:42.224009', 'step': 519, 'epoch': 1} {'type': 'loss', 'content': 0.03013140894472599, 'timestamp': '2025-09-10 02:26:42.247118', 'step': 520, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:42.275353', 'step': 520, 'epoch': 1} {'type': 'loss', 'content': 0.016971739009022713, 'timestamp': '2025-09-10 02:26:42.276865', 'step': 521, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:42.304896', 'step': 521, 'epoch': 1} {'type': 'loss', 'content': 0.02043677680194378, 'timestamp': '2025-09-10 02:26:42.306589', 'step': 522, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:42.335270', 'step': 522, 'epoch': 1} {'type': 'loss', 'content': 0.046038419008255005, 'timestamp': '2025-09-10 02:26:42.336806', 'step': 523, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:42.365067', 'step': 523, 'epoch': 1} {'type': 'loss', 'content': 0.029047995805740356, 'timestamp': '2025-09-10 02:26:42.388138', 'step': 524, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:42.416710', 'step': 524, 'epoch': 1} {'type': 'loss', 'content': 0.012068702839314938, 'timestamp': '2025-09-10 02:26:42.418482', 'step': 525, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:42.447067', 'step': 525, 'epoch': 1} {'type': 'loss', 'content': 0.074915811419487, 'timestamp': '2025-09-10 02:26:42.448479', 'step': 526, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:42.476557', 'step': 526, 'epoch': 1} {'type': 'loss', 'content': 0.025899339467287064, 'timestamp': '2025-09-10 02:26:42.478115', 'step': 527, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:42.506268', 'step': 527, 'epoch': 1} {'type': 'loss', 'content': 0.027209777384996414, 'timestamp': '2025-09-10 02:26:42.529353', 'step': 528, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:42.558126', 'step': 528, 'epoch': 1} {'type': 'loss', 'content': 0.0507054440677166, 'timestamp': '2025-09-10 02:26:42.559543', 'step': 529, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:42.587824', 'step': 529, 'epoch': 1} {'type': 'loss', 'content': 0.06465242058038712, 'timestamp': '2025-09-10 02:26:42.589472', 'step': 530, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:42.618335', 'step': 530, 'epoch': 1} {'type': 'loss', 'content': 0.013187861070036888, 'timestamp': '2025-09-10 02:26:42.620011', 'step': 531, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:42.648790', 'step': 531, 'epoch': 1} {'type': 'loss', 'content': 0.039449822157621384, 'timestamp': '2025-09-10 02:26:42.671710', 'step': 532, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:42.700993', 'step': 532, 'epoch': 1} {'type': 'loss', 'content': 0.0234070997685194, 'timestamp': '2025-09-10 02:26:42.702553', 'step': 533, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:42.731036', 'step': 533, 'epoch': 1} {'type': 'loss', 'content': 0.05278443172574043, 'timestamp': '2025-09-10 02:26:42.732818', 'step': 534, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:42.761049', 'step': 534, 'epoch': 1} {'type': 'loss', 'content': 0.03800437971949577, 'timestamp': '2025-09-10 02:26:42.762490', 'step': 535, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:42.790796', 'step': 535, 'epoch': 1} {'type': 'loss', 'content': 0.03744731470942497, 'timestamp': '2025-09-10 02:26:42.814055', 'step': 536, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:42.842521', 'step': 536, 'epoch': 1} {'type': 'loss', 'content': 0.020335109904408455, 'timestamp': '2025-09-10 02:26:42.844324', 'step': 537, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:42.872482', 'step': 537, 'epoch': 1} {'type': 'loss', 'content': 0.026498381048440933, 'timestamp': '2025-09-10 02:26:42.873948', 'step': 538, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:42.902204', 'step': 538, 'epoch': 1} {'type': 'loss', 'content': 0.021354462951421738, 'timestamp': '2025-09-10 02:26:42.904079', 'step': 539, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:42.932522', 'step': 539, 'epoch': 1} {'type': 'loss', 'content': 0.023156922310590744, 'timestamp': '2025-09-10 02:26:42.955560', 'step': 540, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:42.984199', 'step': 540, 'epoch': 1} {'type': 'loss', 'content': 0.03991435095667839, 'timestamp': '2025-09-10 02:26:42.985766', 'step': 541, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:43.014073', 'step': 541, 'epoch': 1} {'type': 'loss', 'content': 0.08322066068649292, 'timestamp': '2025-09-10 02:26:43.015751', 'step': 542, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:43.044092', 'step': 542, 'epoch': 1} {'type': 'loss', 'content': 0.045775383710861206, 'timestamp': '2025-09-10 02:26:43.045490', 'step': 543, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:43.073719', 'step': 543, 'epoch': 1} {'type': 'loss', 'content': 0.04141625016927719, 'timestamp': '2025-09-10 02:26:43.096638', 'step': 544, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:43.125323', 'step': 544, 'epoch': 1} {'type': 'loss', 'content': 0.053145766258239746, 'timestamp': '2025-09-10 02:26:43.126932', 'step': 545, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:43.155397', 'step': 545, 'epoch': 1} {'type': 'loss', 'content': 0.031390417367219925, 'timestamp': '2025-09-10 02:26:43.156779', 'step': 546, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:43.185162', 'step': 546, 'epoch': 1} {'type': 'loss', 'content': 0.028599724173545837, 'timestamp': '2025-09-10 02:26:43.186943', 'step': 547, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:43.215096', 'step': 547, 'epoch': 1} {'type': 'loss', 'content': 0.01685398444533348, 'timestamp': '2025-09-10 02:26:43.238252', 'step': 548, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:43.271233', 'step': 548, 'epoch': 1} {'type': 'loss', 'content': 0.01568339765071869, 'timestamp': '2025-09-10 02:26:43.272866', 'step': 549, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:43.301708', 'step': 549, 'epoch': 1} {'type': 'loss', 'content': 0.024174261838197708, 'timestamp': '2025-09-10 02:26:43.303375', 'step': 550, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:43.332073', 'step': 550, 'epoch': 1} {'type': 'loss', 'content': 0.012761001475155354, 'timestamp': '2025-09-10 02:26:43.333506', 'step': 551, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:43.362030', 'step': 551, 'epoch': 1} {'type': 'loss', 'content': 0.029998183250427246, 'timestamp': '2025-09-10 02:26:43.385488', 'step': 552, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:43.414645', 'step': 552, 'epoch': 1} {'type': 'loss', 'content': 0.022816693410277367, 'timestamp': '2025-09-10 02:26:43.416679', 'step': 553, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:43.445258', 'step': 553, 'epoch': 1} {'type': 'loss', 'content': 0.05389130488038063, 'timestamp': '2025-09-10 02:26:43.447204', 'step': 554, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:43.475768', 'step': 554, 'epoch': 1} {'type': 'loss', 'content': 0.02855268493294716, 'timestamp': '2025-09-10 02:26:43.477464', 'step': 555, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:43.505718', 'step': 555, 'epoch': 1} {'type': 'loss', 'content': 0.02061382308602333, 'timestamp': '2025-09-10 02:26:43.529083', 'step': 556, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:43.557476', 'step': 556, 'epoch': 1} {'type': 'loss', 'content': 0.08206148445606232, 'timestamp': '2025-09-10 02:26:43.559541', 'step': 557, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:43.587944', 'step': 557, 'epoch': 1} {'type': 'loss', 'content': 0.02579513192176819, 'timestamp': '2025-09-10 02:26:43.589636', 'step': 558, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:43.618052', 'step': 558, 'epoch': 1} {'type': 'loss', 'content': 0.009594475850462914, 'timestamp': '2025-09-10 02:26:43.619915', 'step': 559, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:43.648792', 'step': 559, 'epoch': 1} {'type': 'loss', 'content': 0.04478087276220322, 'timestamp': '2025-09-10 02:26:43.671998', 'step': 560, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:43.700559', 'step': 560, 'epoch': 1} {'type': 'loss', 'content': 0.028195369988679886, 'timestamp': '2025-09-10 02:26:43.702105', 'step': 561, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:43.730458', 'step': 561, 'epoch': 1} {'type': 'loss', 'content': 0.04168400168418884, 'timestamp': '2025-09-10 02:26:43.732410', 'step': 562, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:43.761456', 'step': 562, 'epoch': 1} {'type': 'loss', 'content': 0.06364897638559341, 'timestamp': '2025-09-10 02:26:43.763142', 'step': 563, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:43.792127', 'step': 563, 'epoch': 1} {'type': 'loss', 'content': 0.03534334525465965, 'timestamp': '2025-09-10 02:26:43.815196', 'step': 564, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:43.843839', 'step': 564, 'epoch': 1} {'type': 'loss', 'content': 0.03911682218313217, 'timestamp': '2025-09-10 02:26:43.845380', 'step': 565, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:43.874083', 'step': 565, 'epoch': 1} {'type': 'loss', 'content': 0.05347789078950882, 'timestamp': '2025-09-10 02:26:43.876052', 'step': 566, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:43.904725', 'step': 566, 'epoch': 1} {'type': 'loss', 'content': 0.02387906052172184, 'timestamp': '2025-09-10 02:26:43.906387', 'step': 567, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:43.935015', 'step': 567, 'epoch': 1} {'type': 'loss', 'content': 0.03277410939335823, 'timestamp': '2025-09-10 02:26:43.958345', 'step': 568, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:43.986785', 'step': 568, 'epoch': 1} {'type': 'loss', 'content': 0.014015858992934227, 'timestamp': '2025-09-10 02:26:43.988868', 'step': 569, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:44.017811', 'step': 569, 'epoch': 1} {'type': 'loss', 'content': 0.0363222174346447, 'timestamp': '2025-09-10 02:26:44.019760', 'step': 570, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:44.049975', 'step': 570, 'epoch': 1} {'type': 'loss', 'content': 0.03212037682533264, 'timestamp': '2025-09-10 02:26:44.051786', 'step': 571, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:44.080092', 'step': 571, 'epoch': 1} {'type': 'loss', 'content': 0.036108482629060745, 'timestamp': '2025-09-10 02:26:44.113427', 'step': 572, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:44.147330', 'step': 572, 'epoch': 1} {'type': 'loss', 'content': 0.05427970737218857, 'timestamp': '2025-09-10 02:26:44.148934', 'step': 573, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:44.185849', 'step': 573, 'epoch': 1} {'type': 'loss', 'content': 0.0445655882358551, 'timestamp': '2025-09-10 02:26:44.187633', 'step': 574, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:44.216105', 'step': 574, 'epoch': 1} {'type': 'loss', 'content': 0.013648323714733124, 'timestamp': '2025-09-10 02:26:44.217890', 'step': 575, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:44.246656', 'step': 575, 'epoch': 1} {'type': 'loss', 'content': 0.048125237226486206, 'timestamp': '2025-09-10 02:26:44.272245', 'step': 576, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:44.301237', 'step': 576, 'epoch': 1} {'type': 'loss', 'content': 0.03517252579331398, 'timestamp': '2025-09-10 02:26:44.303187', 'step': 577, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:44.331943', 'step': 577, 'epoch': 1} {'type': 'loss', 'content': 0.026025895029306412, 'timestamp': '2025-09-10 02:26:44.333916', 'step': 578, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:44.367308', 'step': 578, 'epoch': 1} {'type': 'loss', 'content': 0.010249711573123932, 'timestamp': '2025-09-10 02:26:44.368942', 'step': 579, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:44.397354', 'step': 579, 'epoch': 1} {'type': 'loss', 'content': 0.037203043699264526, 'timestamp': '2025-09-10 02:26:44.425016', 'step': 580, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:44.453711', 'step': 580, 'epoch': 1} {'type': 'loss', 'content': 0.027322134003043175, 'timestamp': '2025-09-10 02:26:44.455532', 'step': 581, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:44.486534', 'step': 581, 'epoch': 1} {'type': 'loss', 'content': 0.05180314928293228, 'timestamp': '2025-09-10 02:26:44.488365', 'step': 582, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:44.516992', 'step': 582, 'epoch': 1} {'type': 'loss', 'content': 0.04153513163328171, 'timestamp': '2025-09-10 02:26:44.518892', 'step': 583, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:44.547695', 'step': 583, 'epoch': 1} {'type': 'loss', 'content': 0.01963025890290737, 'timestamp': '2025-09-10 02:26:44.575161', 'step': 584, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:44.607489', 'step': 584, 'epoch': 1} {'type': 'loss', 'content': 0.028999345377087593, 'timestamp': '2025-09-10 02:26:44.608907', 'step': 585, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:44.637714', 'step': 585, 'epoch': 1} {'type': 'loss', 'content': 0.01062434446066618, 'timestamp': '2025-09-10 02:26:44.639332', 'step': 586, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:44.668319', 'step': 586, 'epoch': 1} {'type': 'loss', 'content': 0.047068577259778976, 'timestamp': '2025-09-10 02:26:44.670027', 'step': 587, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:44.698001', 'step': 587, 'epoch': 1} {'type': 'loss', 'content': 0.018893277272582054, 'timestamp': '2025-09-10 02:26:44.721191', 'step': 588, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:44.757206', 'step': 588, 'epoch': 1} {'type': 'loss', 'content': 0.03721915930509567, 'timestamp': '2025-09-10 02:26:44.758884', 'step': 589, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:44.789131', 'step': 589, 'epoch': 1} {'type': 'loss', 'content': 0.02848803997039795, 'timestamp': '2025-09-10 02:26:44.790837', 'step': 590, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:44.819290', 'step': 590, 'epoch': 1} {'type': 'loss', 'content': 0.019638491794466972, 'timestamp': '2025-09-10 02:26:44.821212', 'step': 591, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:44.850107', 'step': 591, 'epoch': 1} {'type': 'loss', 'content': 0.03575807437300682, 'timestamp': '2025-09-10 02:26:44.874463', 'step': 592, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:44.903444', 'step': 592, 'epoch': 1} {'type': 'loss', 'content': 0.027394594624638557, 'timestamp': '2025-09-10 02:26:44.905074', 'step': 593, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:44.933905', 'step': 593, 'epoch': 1} {'type': 'loss', 'content': 0.02229316346347332, 'timestamp': '2025-09-10 02:26:44.935774', 'step': 594, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:44.964452', 'step': 594, 'epoch': 1} {'type': 'loss', 'content': 0.02307887002825737, 'timestamp': '2025-09-10 02:26:44.966155', 'step': 595, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:44.994886', 'step': 595, 'epoch': 1} {'type': 'loss', 'content': 0.01366735901683569, 'timestamp': '2025-09-10 02:26:45.018222', 'step': 596, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:45.047055', 'step': 596, 'epoch': 1} {'type': 'loss', 'content': 0.046775348484516144, 'timestamp': '2025-09-10 02:26:45.048939', 'step': 597, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:45.077744', 'step': 597, 'epoch': 1} {'type': 'loss', 'content': 0.015637096017599106, 'timestamp': '2025-09-10 02:26:45.079685', 'step': 598, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:45.108499', 'step': 598, 'epoch': 1} {'type': 'loss', 'content': 0.015831107273697853, 'timestamp': '2025-09-10 02:26:45.110363', 'step': 599, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:45.139372', 'step': 599, 'epoch': 1} {'type': 'loss', 'content': 0.04828057810664177, 'timestamp': '2025-09-10 02:26:45.162873', 'step': 600, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:45.191474', 'step': 600, 'epoch': 1} {'type': 'loss', 'content': 0.039840757846832275, 'timestamp': '2025-09-10 02:26:45.193168', 'step': 601, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:45.221727', 'step': 601, 'epoch': 1} {'type': 'loss', 'content': 0.0489020049571991, 'timestamp': '2025-09-10 02:26:45.223404', 'step': 602, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:45.252583', 'step': 602, 'epoch': 1} {'type': 'loss', 'content': 0.008196599781513214, 'timestamp': '2025-09-10 02:26:45.254015', 'step': 603, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:45.282326', 'step': 603, 'epoch': 1} {'type': 'loss', 'content': 0.033972881734371185, 'timestamp': '2025-09-10 02:26:45.305506', 'step': 604, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:45.335052', 'step': 604, 'epoch': 1} {'type': 'loss', 'content': 0.03189618140459061, 'timestamp': '2025-09-10 02:26:45.336930', 'step': 605, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:45.365521', 'step': 605, 'epoch': 1} {'type': 'loss', 'content': 0.03479809686541557, 'timestamp': '2025-09-10 02:26:45.367988', 'step': 606, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:45.396696', 'step': 606, 'epoch': 1} {'type': 'loss', 'content': 0.008106947876513004, 'timestamp': '2025-09-10 02:26:45.398696', 'step': 607, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:45.427353', 'step': 607, 'epoch': 1} {'type': 'loss', 'content': 0.04672331362962723, 'timestamp': '2025-09-10 02:26:45.450618', 'step': 608, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1582003754624}], 'timestamp': '2025-09-10 02:26:47.291541', 'step': 608, 'epoch': 1} {'type': 'pplx', 'content': 2200986.452202289, 'timestamp': '2025-09-10 02:26:47.293495', 'step': 608, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:47.321364', 'step': 608, 'epoch': 1} {'type': 'loss', 'content': 0.07109908759593964, 'timestamp': '2025-09-10 02:26:47.323033', 'step': 609, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:47.351547', 'step': 609, 'epoch': 1} {'type': 'loss', 'content': 0.0423298217356205, 'timestamp': '2025-09-10 02:26:47.352995', 'step': 610, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:47.381791', 'step': 610, 'epoch': 1} {'type': 'loss', 'content': 0.015477584674954414, 'timestamp': '2025-09-10 02:26:47.383254', 'step': 611, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:47.412143', 'step': 611, 'epoch': 1} {'type': 'loss', 'content': 0.010698787868022919, 'timestamp': '2025-09-10 02:26:47.435218', 'step': 612, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:47.463670', 'step': 612, 'epoch': 1} {'type': 'loss', 'content': 0.03782622888684273, 'timestamp': '2025-09-10 02:26:47.465354', 'step': 613, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:47.494574', 'step': 613, 'epoch': 1} {'type': 'loss', 'content': 0.047737814486026764, 'timestamp': '2025-09-10 02:26:47.496084', 'step': 614, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:47.524379', 'step': 614, 'epoch': 1} {'type': 'loss', 'content': 0.009725173935294151, 'timestamp': '2025-09-10 02:26:47.526432', 'step': 615, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:47.555581', 'step': 615, 'epoch': 1} {'type': 'loss', 'content': 0.006624232046306133, 'timestamp': '2025-09-10 02:26:47.579249', 'step': 616, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:47.607571', 'step': 616, 'epoch': 1} {'type': 'loss', 'content': 0.046601273119449615, 'timestamp': '2025-09-10 02:26:47.609253', 'step': 617, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:47.638308', 'step': 617, 'epoch': 1} {'type': 'loss', 'content': 0.031758103519678116, 'timestamp': '2025-09-10 02:26:47.640190', 'step': 618, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:47.668808', 'step': 618, 'epoch': 1} {'type': 'loss', 'content': 0.0243473369628191, 'timestamp': '2025-09-10 02:26:47.670406', 'step': 619, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:47.698825', 'step': 619, 'epoch': 1} {'type': 'loss', 'content': 0.018934383988380432, 'timestamp': '2025-09-10 02:26:47.722157', 'step': 620, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:47.751167', 'step': 620, 'epoch': 1} {'type': 'loss', 'content': 0.0067651644349098206, 'timestamp': '2025-09-10 02:26:47.752961', 'step': 621, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:47.781712', 'step': 621, 'epoch': 1} {'type': 'loss', 'content': 0.02667141892015934, 'timestamp': '2025-09-10 02:26:47.783431', 'step': 622, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:47.811983', 'step': 622, 'epoch': 1} {'type': 'loss', 'content': 0.04350658506155014, 'timestamp': '2025-09-10 02:26:47.813851', 'step': 623, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:47.842399', 'step': 623, 'epoch': 1} {'type': 'loss', 'content': 0.0501113124191761, 'timestamp': '2025-09-10 02:26:47.865832', 'step': 624, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:47.894643', 'step': 624, 'epoch': 1} {'type': 'loss', 'content': 0.061473067849874496, 'timestamp': '2025-09-10 02:26:47.896345', 'step': 625, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:47.924558', 'step': 625, 'epoch': 1} {'type': 'loss', 'content': 0.03871607780456543, 'timestamp': '2025-09-10 02:26:47.926386', 'step': 626, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:47.954495', 'step': 626, 'epoch': 1} {'type': 'loss', 'content': 0.03448708355426788, 'timestamp': '2025-09-10 02:26:47.955903', 'step': 627, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:47.983580', 'step': 627, 'epoch': 1} {'type': 'loss', 'content': 0.0213655773550272, 'timestamp': '2025-09-10 02:26:48.006853', 'step': 628, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:48.036037', 'step': 628, 'epoch': 1} {'type': 'loss', 'content': 0.022643353790044785, 'timestamp': '2025-09-10 02:26:48.037597', 'step': 629, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:48.065825', 'step': 629, 'epoch': 1} {'type': 'loss', 'content': 0.04520813003182411, 'timestamp': '2025-09-10 02:26:48.067593', 'step': 630, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:48.096649', 'step': 630, 'epoch': 1} {'type': 'loss', 'content': 0.04208702594041824, 'timestamp': '2025-09-10 02:26:48.098437', 'step': 631, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:48.127257', 'step': 631, 'epoch': 1} {'type': 'loss', 'content': 0.045431990176439285, 'timestamp': '2025-09-10 02:26:48.150642', 'step': 632, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:48.179534', 'step': 632, 'epoch': 1} {'type': 'loss', 'content': 0.01084569375962019, 'timestamp': '2025-09-10 02:26:48.181145', 'step': 633, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:48.209489', 'step': 633, 'epoch': 1} {'type': 'loss', 'content': 0.049830012023448944, 'timestamp': '2025-09-10 02:26:48.211225', 'step': 634, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:48.243221', 'step': 634, 'epoch': 1} {'type': 'loss', 'content': 0.03610464185476303, 'timestamp': '2025-09-10 02:26:48.245009', 'step': 635, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:48.273148', 'step': 635, 'epoch': 1} {'type': 'loss', 'content': 0.028134096413850784, 'timestamp': '2025-09-10 02:26:48.296275', 'step': 636, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:48.324879', 'step': 636, 'epoch': 1} {'type': 'loss', 'content': 0.00924620684236288, 'timestamp': '2025-09-10 02:26:48.326769', 'step': 637, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:48.355328', 'step': 637, 'epoch': 1} {'type': 'loss', 'content': 0.03791581466794014, 'timestamp': '2025-09-10 02:26:48.357050', 'step': 638, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:48.385959', 'step': 638, 'epoch': 1} {'type': 'loss', 'content': 0.02533513866364956, 'timestamp': '2025-09-10 02:26:48.387588', 'step': 639, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:48.416118', 'step': 639, 'epoch': 1} {'type': 'loss', 'content': 0.037173282355070114, 'timestamp': '2025-09-10 02:26:48.439274', 'step': 640, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:48.468541', 'step': 640, 'epoch': 1} {'type': 'loss', 'content': 0.026109185069799423, 'timestamp': '2025-09-10 02:26:48.470084', 'step': 641, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:48.498801', 'step': 641, 'epoch': 1} {'type': 'loss', 'content': 0.019872980192303658, 'timestamp': '2025-09-10 02:26:48.500314', 'step': 642, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:48.528802', 'step': 642, 'epoch': 1} {'type': 'loss', 'content': 0.04102848097681999, 'timestamp': '2025-09-10 02:26:48.530524', 'step': 643, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:48.559413', 'step': 643, 'epoch': 1} {'type': 'loss', 'content': 0.057591069489717484, 'timestamp': '2025-09-10 02:26:48.582748', 'step': 644, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:48.611840', 'step': 644, 'epoch': 1} {'type': 'loss', 'content': 0.06546728312969208, 'timestamp': '2025-09-10 02:26:48.613714', 'step': 645, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:48.642318', 'step': 645, 'epoch': 1} {'type': 'loss', 'content': 0.02737610973417759, 'timestamp': '2025-09-10 02:26:48.644304', 'step': 646, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:48.673191', 'step': 646, 'epoch': 1} {'type': 'loss', 'content': 0.026363076642155647, 'timestamp': '2025-09-10 02:26:48.674817', 'step': 647, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:48.703577', 'step': 647, 'epoch': 1} {'type': 'loss', 'content': 0.036277834326028824, 'timestamp': '2025-09-10 02:26:48.726689', 'step': 648, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:48.755769', 'step': 648, 'epoch': 1} {'type': 'loss', 'content': 0.03815680742263794, 'timestamp': '2025-09-10 02:26:48.757495', 'step': 649, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:48.785888', 'step': 649, 'epoch': 1} {'type': 'loss', 'content': 0.05288299545645714, 'timestamp': '2025-09-10 02:26:48.787603', 'step': 650, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:48.816012', 'step': 650, 'epoch': 1} {'type': 'loss', 'content': 0.007451296783983707, 'timestamp': '2025-09-10 02:26:48.817826', 'step': 651, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:48.846628', 'step': 651, 'epoch': 1} {'type': 'loss', 'content': 0.018843041732907295, 'timestamp': '2025-09-10 02:26:48.869876', 'step': 652, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:48.898911', 'step': 652, 'epoch': 1} {'type': 'loss', 'content': 0.013613715767860413, 'timestamp': '2025-09-10 02:26:48.900363', 'step': 653, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:48.928360', 'step': 653, 'epoch': 1} {'type': 'loss', 'content': 0.06748726963996887, 'timestamp': '2025-09-10 02:26:48.929862', 'step': 654, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:48.957990', 'step': 654, 'epoch': 1} {'type': 'loss', 'content': 0.043075766414403915, 'timestamp': '2025-09-10 02:26:48.959391', 'step': 655, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:48.987570', 'step': 655, 'epoch': 1} {'type': 'loss', 'content': 0.042788174003362656, 'timestamp': '2025-09-10 02:26:49.010596', 'step': 656, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:49.039316', 'step': 656, 'epoch': 1} {'type': 'loss', 'content': 0.055226586759090424, 'timestamp': '2025-09-10 02:26:49.040912', 'step': 657, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:49.068828', 'step': 657, 'epoch': 1} {'type': 'loss', 'content': 0.053308092057704926, 'timestamp': '2025-09-10 02:26:49.070686', 'step': 658, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:49.099403', 'step': 658, 'epoch': 1} {'type': 'loss', 'content': 0.028021065518260002, 'timestamp': '2025-09-10 02:26:49.101193', 'step': 659, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:49.129822', 'step': 659, 'epoch': 1} {'type': 'loss', 'content': 0.032105498015880585, 'timestamp': '2025-09-10 02:26:49.153282', 'step': 660, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:49.182065', 'step': 660, 'epoch': 1} {'type': 'loss', 'content': 0.04760170355439186, 'timestamp': '2025-09-10 02:26:49.183890', 'step': 661, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:49.212643', 'step': 661, 'epoch': 1} {'type': 'loss', 'content': 0.02774801477789879, 'timestamp': '2025-09-10 02:26:49.214414', 'step': 662, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:49.242896', 'step': 662, 'epoch': 1} {'type': 'loss', 'content': 0.05581825226545334, 'timestamp': '2025-09-10 02:26:49.244781', 'step': 663, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:49.272715', 'step': 663, 'epoch': 1} {'type': 'loss', 'content': 0.009996329434216022, 'timestamp': '2025-09-10 02:26:49.295831', 'step': 664, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:49.324296', 'step': 664, 'epoch': 1} {'type': 'loss', 'content': 0.015302867628633976, 'timestamp': '2025-09-10 02:26:49.325907', 'step': 665, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:49.353981', 'step': 665, 'epoch': 1} {'type': 'loss', 'content': 0.014631493017077446, 'timestamp': '2025-09-10 02:26:49.355922', 'step': 666, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:49.384685', 'step': 666, 'epoch': 1} {'type': 'loss', 'content': 0.03862696886062622, 'timestamp': '2025-09-10 02:26:49.386372', 'step': 667, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:49.415127', 'step': 667, 'epoch': 1} {'type': 'loss', 'content': 0.026202142238616943, 'timestamp': '2025-09-10 02:26:49.438485', 'step': 668, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:49.467023', 'step': 668, 'epoch': 1} {'type': 'loss', 'content': 0.03486243262887001, 'timestamp': '2025-09-10 02:26:49.468711', 'step': 669, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:49.497397', 'step': 669, 'epoch': 1} {'type': 'loss', 'content': 0.023656947538256645, 'timestamp': '2025-09-10 02:26:49.498743', 'step': 670, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:49.527268', 'step': 670, 'epoch': 1} {'type': 'loss', 'content': 0.031210143119096756, 'timestamp': '2025-09-10 02:26:49.528829', 'step': 671, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:49.557469', 'step': 671, 'epoch': 1} {'type': 'loss', 'content': 0.007307054009288549, 'timestamp': '2025-09-10 02:26:49.580396', 'step': 672, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:49.609705', 'step': 672, 'epoch': 1} {'type': 'loss', 'content': 0.025393787771463394, 'timestamp': '2025-09-10 02:26:49.611422', 'step': 673, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:49.639948', 'step': 673, 'epoch': 1} {'type': 'loss', 'content': 0.04881792888045311, 'timestamp': '2025-09-10 02:26:49.641661', 'step': 674, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:49.669590', 'step': 674, 'epoch': 1} {'type': 'loss', 'content': 0.006452545057982206, 'timestamp': '2025-09-10 02:26:49.671096', 'step': 675, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:49.699870', 'step': 675, 'epoch': 1} {'type': 'loss', 'content': 0.0486336275935173, 'timestamp': '2025-09-10 02:26:49.723222', 'step': 676, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:49.751719', 'step': 676, 'epoch': 1} {'type': 'loss', 'content': 0.06550680845975876, 'timestamp': '2025-09-10 02:26:49.753426', 'step': 677, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:49.782394', 'step': 677, 'epoch': 1} {'type': 'loss', 'content': 0.058530163019895554, 'timestamp': '2025-09-10 02:26:49.784240', 'step': 678, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:49.813042', 'step': 678, 'epoch': 1} {'type': 'loss', 'content': 0.0352046824991703, 'timestamp': '2025-09-10 02:26:49.814771', 'step': 679, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:49.843133', 'step': 679, 'epoch': 1} {'type': 'loss', 'content': 0.020520886406302452, 'timestamp': '2025-09-10 02:26:49.866393', 'step': 680, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:49.894804', 'step': 680, 'epoch': 1} {'type': 'loss', 'content': 0.011987773701548576, 'timestamp': '2025-09-10 02:26:49.896333', 'step': 681, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:49.924330', 'step': 681, 'epoch': 1} {'type': 'loss', 'content': 0.023767616599798203, 'timestamp': '2025-09-10 02:26:49.926027', 'step': 682, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:49.955016', 'step': 682, 'epoch': 1} {'type': 'loss', 'content': 0.021541643887758255, 'timestamp': '2025-09-10 02:26:49.956569', 'step': 683, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:49.984250', 'step': 683, 'epoch': 1} {'type': 'loss', 'content': 0.015927335247397423, 'timestamp': '2025-09-10 02:26:50.007246', 'step': 684, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:50.035920', 'step': 684, 'epoch': 1} {'type': 'loss', 'content': 0.010418176651000977, 'timestamp': '2025-09-10 02:26:50.037275', 'step': 685, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:50.065636', 'step': 685, 'epoch': 1} {'type': 'loss', 'content': 0.0358634777367115, 'timestamp': '2025-09-10 02:26:50.067184', 'step': 686, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:50.095420', 'step': 686, 'epoch': 1} {'type': 'loss', 'content': 0.03563470393419266, 'timestamp': '2025-09-10 02:26:50.096846', 'step': 687, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:50.125020', 'step': 687, 'epoch': 1} {'type': 'loss', 'content': 0.024202287197113037, 'timestamp': '2025-09-10 02:26:50.148289', 'step': 688, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:50.176645', 'step': 688, 'epoch': 1} {'type': 'loss', 'content': 0.0377209298312664, 'timestamp': '2025-09-10 02:26:50.178424', 'step': 689, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:50.206812', 'step': 689, 'epoch': 1} {'type': 'loss', 'content': 0.020676741376519203, 'timestamp': '2025-09-10 02:26:50.208292', 'step': 690, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:50.236682', 'step': 690, 'epoch': 1} {'type': 'loss', 'content': 0.06705940514802933, 'timestamp': '2025-09-10 02:26:50.238295', 'step': 691, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:50.266571', 'step': 691, 'epoch': 1} {'type': 'loss', 'content': 0.0945725068449974, 'timestamp': '2025-09-10 02:26:50.289528', 'step': 692, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:50.317965', 'step': 692, 'epoch': 1} {'type': 'loss', 'content': 0.033295098692178726, 'timestamp': '2025-09-10 02:26:50.319377', 'step': 693, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:50.347257', 'step': 693, 'epoch': 1} {'type': 'loss', 'content': 0.037053436040878296, 'timestamp': '2025-09-10 02:26:50.348812', 'step': 694, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:50.376989', 'step': 694, 'epoch': 1} {'type': 'loss', 'content': 0.021943015977740288, 'timestamp': '2025-09-10 02:26:50.378569', 'step': 695, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:50.406669', 'step': 695, 'epoch': 1} {'type': 'loss', 'content': 0.026456695050001144, 'timestamp': '2025-09-10 02:26:50.429853', 'step': 696, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:50.457872', 'step': 696, 'epoch': 1} {'type': 'loss', 'content': 0.03228648751974106, 'timestamp': '2025-09-10 02:26:50.459470', 'step': 697, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:50.487012', 'step': 697, 'epoch': 1} {'type': 'loss', 'content': 0.023058131337165833, 'timestamp': '2025-09-10 02:26:50.488465', 'step': 698, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:50.516163', 'step': 698, 'epoch': 1} {'type': 'loss', 'content': 0.030382486060261726, 'timestamp': '2025-09-10 02:26:50.517577', 'step': 699, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:50.545948', 'step': 699, 'epoch': 1} {'type': 'loss', 'content': 0.02017112448811531, 'timestamp': '2025-09-10 02:26:50.569146', 'step': 700, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:50.597610', 'step': 700, 'epoch': 1} {'type': 'loss', 'content': 0.016261307522654533, 'timestamp': '2025-09-10 02:26:50.599127', 'step': 701, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:50.628167', 'step': 701, 'epoch': 1} {'type': 'loss', 'content': 0.04979781433939934, 'timestamp': '2025-09-10 02:26:50.629878', 'step': 702, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:50.658549', 'step': 702, 'epoch': 1} {'type': 'loss', 'content': 0.03741488605737686, 'timestamp': '2025-09-10 02:26:50.660198', 'step': 703, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:50.688713', 'step': 703, 'epoch': 1} {'type': 'loss', 'content': 0.032305363565683365, 'timestamp': '2025-09-10 02:26:50.711837', 'step': 704, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:50.741072', 'step': 704, 'epoch': 1} {'type': 'loss', 'content': 0.008190718479454517, 'timestamp': '2025-09-10 02:26:50.742721', 'step': 705, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:50.770795', 'step': 705, 'epoch': 1} {'type': 'loss', 'content': 0.03497549146413803, 'timestamp': '2025-09-10 02:26:50.772293', 'step': 706, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:50.800601', 'step': 706, 'epoch': 1} {'type': 'loss', 'content': 0.032218027859926224, 'timestamp': '2025-09-10 02:26:50.801985', 'step': 707, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:50.830096', 'step': 707, 'epoch': 1} {'type': 'loss', 'content': 0.02115151286125183, 'timestamp': '2025-09-10 02:26:50.853368', 'step': 708, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:50.881547', 'step': 708, 'epoch': 1} {'type': 'loss', 'content': 0.03463287279009819, 'timestamp': '2025-09-10 02:26:50.883151', 'step': 709, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:50.911659', 'step': 709, 'epoch': 1} {'type': 'loss', 'content': 0.0242976825684309, 'timestamp': '2025-09-10 02:26:50.913106', 'step': 710, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:50.941378', 'step': 710, 'epoch': 1} {'type': 'loss', 'content': 0.0387067086994648, 'timestamp': '2025-09-10 02:26:50.943066', 'step': 711, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:50.971672', 'step': 711, 'epoch': 1} {'type': 'loss', 'content': 0.0348440445959568, 'timestamp': '2025-09-10 02:26:50.994804', 'step': 712, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:51.023788', 'step': 712, 'epoch': 1} {'type': 'loss', 'content': 0.06683380901813507, 'timestamp': '2025-09-10 02:26:51.025261', 'step': 713, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:51.053725', 'step': 713, 'epoch': 1} {'type': 'loss', 'content': 0.020580554381012917, 'timestamp': '2025-09-10 02:26:51.055205', 'step': 714, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:51.083768', 'step': 714, 'epoch': 1} {'type': 'loss', 'content': 0.0237856637686491, 'timestamp': '2025-09-10 02:26:51.085241', 'step': 715, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:51.113530', 'step': 715, 'epoch': 1} {'type': 'loss', 'content': 0.013393201865255833, 'timestamp': '2025-09-10 02:26:51.136848', 'step': 716, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:51.165110', 'step': 716, 'epoch': 1} {'type': 'loss', 'content': 0.037130821496248245, 'timestamp': '2025-09-10 02:26:51.166682', 'step': 717, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:51.194854', 'step': 717, 'epoch': 1} {'type': 'loss', 'content': 0.03772103041410446, 'timestamp': '2025-09-10 02:26:51.196396', 'step': 718, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:51.224736', 'step': 718, 'epoch': 1} {'type': 'loss', 'content': 0.012475842610001564, 'timestamp': '2025-09-10 02:26:51.226341', 'step': 719, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:51.255110', 'step': 719, 'epoch': 1} {'type': 'loss', 'content': 0.03414268419146538, 'timestamp': '2025-09-10 02:26:51.278112', 'step': 720, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:51.306978', 'step': 720, 'epoch': 1} {'type': 'loss', 'content': 0.05466434359550476, 'timestamp': '2025-09-10 02:26:51.308617', 'step': 721, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:51.337372', 'step': 721, 'epoch': 1} {'type': 'loss', 'content': 0.04362777993083, 'timestamp': '2025-09-10 02:26:51.339113', 'step': 722, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:51.367864', 'step': 722, 'epoch': 1} {'type': 'loss', 'content': 0.028265880420804024, 'timestamp': '2025-09-10 02:26:51.369365', 'step': 723, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:51.397803', 'step': 723, 'epoch': 1} {'type': 'loss', 'content': 0.007118005305528641, 'timestamp': '2025-09-10 02:26:51.420943', 'step': 724, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:51.449569', 'step': 724, 'epoch': 1} {'type': 'loss', 'content': 0.03422413393855095, 'timestamp': '2025-09-10 02:26:51.451038', 'step': 725, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:51.479143', 'step': 725, 'epoch': 1} {'type': 'loss', 'content': 0.03792322799563408, 'timestamp': '2025-09-10 02:26:51.480799', 'step': 726, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:51.508998', 'step': 726, 'epoch': 1} {'type': 'loss', 'content': 0.04148111492395401, 'timestamp': '2025-09-10 02:26:51.510379', 'step': 727, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:51.538681', 'step': 727, 'epoch': 1} {'type': 'loss', 'content': 0.05498867854475975, 'timestamp': '2025-09-10 02:26:51.561876', 'step': 728, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:51.590170', 'step': 728, 'epoch': 1} {'type': 'loss', 'content': 0.04803735017776489, 'timestamp': '2025-09-10 02:26:51.591844', 'step': 729, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:51.620032', 'step': 729, 'epoch': 1} {'type': 'loss', 'content': 0.02832743152976036, 'timestamp': '2025-09-10 02:26:51.621455', 'step': 730, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:51.649972', 'step': 730, 'epoch': 1} {'type': 'loss', 'content': 0.02841213345527649, 'timestamp': '2025-09-10 02:26:51.651532', 'step': 731, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:51.679910', 'step': 731, 'epoch': 1} {'type': 'loss', 'content': 0.035199183970689774, 'timestamp': '2025-09-10 02:26:51.702997', 'step': 732, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:51.732103', 'step': 732, 'epoch': 1} {'type': 'loss', 'content': 0.03662240505218506, 'timestamp': '2025-09-10 02:26:51.733618', 'step': 733, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:51.761974', 'step': 733, 'epoch': 1} {'type': 'loss', 'content': 0.01077954936772585, 'timestamp': '2025-09-10 02:26:51.763242', 'step': 734, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:51.791632', 'step': 734, 'epoch': 1} {'type': 'loss', 'content': 0.02980462647974491, 'timestamp': '2025-09-10 02:26:51.793223', 'step': 735, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:51.822116', 'step': 735, 'epoch': 1} {'type': 'loss', 'content': 0.04685065522789955, 'timestamp': '2025-09-10 02:26:51.845259', 'step': 736, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:51.873383', 'step': 736, 'epoch': 1} {'type': 'loss', 'content': 0.02059471607208252, 'timestamp': '2025-09-10 02:26:51.875008', 'step': 737, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:51.903465', 'step': 737, 'epoch': 1} {'type': 'loss', 'content': 0.016841420903801918, 'timestamp': '2025-09-10 02:26:51.905080', 'step': 738, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:51.933906', 'step': 738, 'epoch': 1} {'type': 'loss', 'content': 0.022859051823616028, 'timestamp': '2025-09-10 02:26:51.935583', 'step': 739, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:51.964162', 'step': 739, 'epoch': 1} {'type': 'loss', 'content': 0.015584629960358143, 'timestamp': '2025-09-10 02:26:51.987156', 'step': 740, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:52.015724', 'step': 740, 'epoch': 1} {'type': 'loss', 'content': 0.0430554524064064, 'timestamp': '2025-09-10 02:26:52.017231', 'step': 741, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:52.045908', 'step': 741, 'epoch': 1} {'type': 'loss', 'content': 0.05952511355280876, 'timestamp': '2025-09-10 02:26:52.047394', 'step': 742, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:52.075681', 'step': 742, 'epoch': 1} {'type': 'loss', 'content': 0.0038539913948625326, 'timestamp': '2025-09-10 02:26:52.077220', 'step': 743, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:52.105938', 'step': 743, 'epoch': 1} {'type': 'loss', 'content': 0.029965851455926895, 'timestamp': '2025-09-10 02:26:52.128888', 'step': 744, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:52.157289', 'step': 744, 'epoch': 1} {'type': 'loss', 'content': 0.07923837006092072, 'timestamp': '2025-09-10 02:26:52.158999', 'step': 745, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:52.188457', 'step': 745, 'epoch': 1} {'type': 'loss', 'content': 0.01898501254618168, 'timestamp': '2025-09-10 02:26:52.190365', 'step': 746, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:52.219837', 'step': 746, 'epoch': 1} {'type': 'loss', 'content': 0.02514803595840931, 'timestamp': '2025-09-10 02:26:52.221387', 'step': 747, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:52.250204', 'step': 747, 'epoch': 1} {'type': 'loss', 'content': 0.04136563092470169, 'timestamp': '2025-09-10 02:26:52.273373', 'step': 748, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:52.301589', 'step': 748, 'epoch': 1} {'type': 'loss', 'content': 0.03028266690671444, 'timestamp': '2025-09-10 02:26:52.303179', 'step': 749, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:52.331645', 'step': 749, 'epoch': 1} {'type': 'loss', 'content': 0.04092634096741676, 'timestamp': '2025-09-10 02:26:52.332998', 'step': 750, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:52.361165', 'step': 750, 'epoch': 1} {'type': 'loss', 'content': 0.06992150843143463, 'timestamp': '2025-09-10 02:26:52.362733', 'step': 751, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:52.390625', 'step': 751, 'epoch': 1} {'type': 'loss', 'content': 0.015275141224265099, 'timestamp': '2025-09-10 02:26:52.413551', 'step': 752, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:52.442333', 'step': 752, 'epoch': 1} {'type': 'loss', 'content': 0.01683962717652321, 'timestamp': '2025-09-10 02:26:52.443854', 'step': 753, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:52.472139', 'step': 753, 'epoch': 1} {'type': 'loss', 'content': 0.035905104130506516, 'timestamp': '2025-09-10 02:26:52.473531', 'step': 754, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:52.501697', 'step': 754, 'epoch': 1} {'type': 'loss', 'content': 0.04694758728146553, 'timestamp': '2025-09-10 02:26:52.503196', 'step': 755, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:52.531662', 'step': 755, 'epoch': 1} {'type': 'loss', 'content': 0.029720274731516838, 'timestamp': '2025-09-10 02:26:52.554611', 'step': 756, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:52.583282', 'step': 756, 'epoch': 1} {'type': 'loss', 'content': 0.02850966341793537, 'timestamp': '2025-09-10 02:26:52.584859', 'step': 757, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:52.612987', 'step': 757, 'epoch': 1} {'type': 'loss', 'content': 0.07100068032741547, 'timestamp': '2025-09-10 02:26:52.614646', 'step': 758, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:52.643105', 'step': 758, 'epoch': 1} {'type': 'loss', 'content': 0.008930629119277, 'timestamp': '2025-09-10 02:26:52.644591', 'step': 759, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:52.673084', 'step': 759, 'epoch': 1} {'type': 'loss', 'content': 0.05873224884271622, 'timestamp': '2025-09-10 02:26:52.696219', 'step': 760, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1582003754624}], 'timestamp': '2025-09-10 02:26:54.548540', 'step': 760, 'epoch': 1} {'type': 'pplx', 'content': 2503157.170217035, 'timestamp': '2025-09-10 02:26:54.550020', 'step': 760, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:54.577915', 'step': 760, 'epoch': 1} {'type': 'loss', 'content': 0.020794304087758064, 'timestamp': '2025-09-10 02:26:54.579372', 'step': 761, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:54.608202', 'step': 761, 'epoch': 1} {'type': 'loss', 'content': 0.02302137203514576, 'timestamp': '2025-09-10 02:26:54.609839', 'step': 762, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:54.638283', 'step': 762, 'epoch': 1} {'type': 'loss', 'content': 0.049961064010858536, 'timestamp': '2025-09-10 02:26:54.639681', 'step': 763, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:54.667812', 'step': 763, 'epoch': 1} {'type': 'loss', 'content': 0.07963868230581284, 'timestamp': '2025-09-10 02:26:54.690824', 'step': 764, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:54.719395', 'step': 764, 'epoch': 1} {'type': 'loss', 'content': 0.03662387281656265, 'timestamp': '2025-09-10 02:26:54.720937', 'step': 765, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:54.749291', 'step': 765, 'epoch': 1} {'type': 'loss', 'content': 0.043657511472702026, 'timestamp': '2025-09-10 02:26:54.750756', 'step': 766, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:54.779230', 'step': 766, 'epoch': 1} {'type': 'loss', 'content': 0.03749134764075279, 'timestamp': '2025-09-10 02:26:54.780610', 'step': 767, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:54.808711', 'step': 767, 'epoch': 1} {'type': 'loss', 'content': 0.025386493653059006, 'timestamp': '2025-09-10 02:26:54.831874', 'step': 768, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:54.860558', 'step': 768, 'epoch': 1} {'type': 'loss', 'content': 0.01552271656692028, 'timestamp': '2025-09-10 02:26:54.862021', 'step': 769, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:54.890902', 'step': 769, 'epoch': 1} {'type': 'loss', 'content': 0.036365240812301636, 'timestamp': '2025-09-10 02:26:54.892427', 'step': 770, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:54.921241', 'step': 770, 'epoch': 1} {'type': 'loss', 'content': 0.008484335616230965, 'timestamp': '2025-09-10 02:26:54.922839', 'step': 771, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:54.951076', 'step': 771, 'epoch': 1} {'type': 'loss', 'content': 0.04429873079061508, 'timestamp': '2025-09-10 02:26:54.974195', 'step': 772, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:55.002711', 'step': 772, 'epoch': 1} {'type': 'loss', 'content': 0.02399025857448578, 'timestamp': '2025-09-10 02:26:55.004149', 'step': 773, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:55.032233', 'step': 773, 'epoch': 1} {'type': 'loss', 'content': 0.036831699311733246, 'timestamp': '2025-09-10 02:26:55.033645', 'step': 774, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:55.061729', 'step': 774, 'epoch': 1} {'type': 'loss', 'content': 0.019087400287389755, 'timestamp': '2025-09-10 02:26:55.063295', 'step': 775, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:55.091778', 'step': 775, 'epoch': 1} {'type': 'loss', 'content': 0.009040270932018757, 'timestamp': '2025-09-10 02:26:55.114634', 'step': 776, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:55.143205', 'step': 776, 'epoch': 1} {'type': 'loss', 'content': 0.015383010730147362, 'timestamp': '2025-09-10 02:26:55.144759', 'step': 777, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:55.172903', 'step': 777, 'epoch': 1} {'type': 'loss', 'content': 0.022596066817641258, 'timestamp': '2025-09-10 02:26:55.174376', 'step': 778, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:55.203278', 'step': 778, 'epoch': 1} {'type': 'loss', 'content': 0.08443605154752731, 'timestamp': '2025-09-10 02:26:55.204576', 'step': 779, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:55.232936', 'step': 779, 'epoch': 1} {'type': 'loss', 'content': 0.01622471585869789, 'timestamp': '2025-09-10 02:26:55.255979', 'step': 780, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:55.284729', 'step': 780, 'epoch': 1} {'type': 'loss', 'content': 0.041626691818237305, 'timestamp': '2025-09-10 02:26:55.286156', 'step': 781, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:55.314236', 'step': 781, 'epoch': 1} {'type': 'loss', 'content': 0.0428607352077961, 'timestamp': '2025-09-10 02:26:55.315703', 'step': 782, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:55.343806', 'step': 782, 'epoch': 1} {'type': 'loss', 'content': 0.03353489935398102, 'timestamp': '2025-09-10 02:26:55.345389', 'step': 783, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:55.374106', 'step': 783, 'epoch': 1} {'type': 'loss', 'content': 0.013459078967571259, 'timestamp': '2025-09-10 02:26:55.397388', 'step': 784, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:55.426018', 'step': 784, 'epoch': 1} {'type': 'loss', 'content': 0.014677911065518856, 'timestamp': '2025-09-10 02:26:55.427567', 'step': 785, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:55.455690', 'step': 785, 'epoch': 1} {'type': 'loss', 'content': 0.0209831353276968, 'timestamp': '2025-09-10 02:26:55.456974', 'step': 786, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:55.484965', 'step': 786, 'epoch': 1} {'type': 'loss', 'content': 0.036396000534296036, 'timestamp': '2025-09-10 02:26:55.486602', 'step': 787, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:55.514945', 'step': 787, 'epoch': 1} {'type': 'loss', 'content': 0.03313400223851204, 'timestamp': '2025-09-10 02:26:55.537842', 'step': 788, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:55.566416', 'step': 788, 'epoch': 1} {'type': 'loss', 'content': 0.022417547181248665, 'timestamp': '2025-09-10 02:26:55.567848', 'step': 789, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:55.596416', 'step': 789, 'epoch': 1} {'type': 'loss', 'content': 0.016005048528313637, 'timestamp': '2025-09-10 02:26:55.597973', 'step': 790, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:55.626100', 'step': 790, 'epoch': 1} {'type': 'loss', 'content': 0.049527037888765335, 'timestamp': '2025-09-10 02:26:55.627650', 'step': 791, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:55.655576', 'step': 791, 'epoch': 1} {'type': 'loss', 'content': 0.04059341177344322, 'timestamp': '2025-09-10 02:26:55.678702', 'step': 792, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:55.707378', 'step': 792, 'epoch': 1} {'type': 'loss', 'content': 0.04651249572634697, 'timestamp': '2025-09-10 02:26:55.708942', 'step': 793, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:55.737621', 'step': 793, 'epoch': 1} {'type': 'loss', 'content': 0.04483857750892639, 'timestamp': '2025-09-10 02:26:55.739355', 'step': 794, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:55.770030', 'step': 794, 'epoch': 1} {'type': 'loss', 'content': 0.007778738159686327, 'timestamp': '2025-09-10 02:26:55.771432', 'step': 795, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:55.800335', 'step': 795, 'epoch': 1} {'type': 'loss', 'content': 0.011393926106393337, 'timestamp': '2025-09-10 02:26:55.823834', 'step': 796, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:55.853266', 'step': 796, 'epoch': 1} {'type': 'loss', 'content': 0.02515154890716076, 'timestamp': '2025-09-10 02:26:55.854858', 'step': 797, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:55.883187', 'step': 797, 'epoch': 1} {'type': 'loss', 'content': 0.034160640090703964, 'timestamp': '2025-09-10 02:26:55.884661', 'step': 798, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:55.913163', 'step': 798, 'epoch': 1} {'type': 'loss', 'content': 0.010150982066988945, 'timestamp': '2025-09-10 02:26:55.914951', 'step': 799, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:55.943338', 'step': 799, 'epoch': 1} {'type': 'loss', 'content': 0.013213365338742733, 'timestamp': '2025-09-10 02:26:55.966528', 'step': 800, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:55.995288', 'step': 800, 'epoch': 1} {'type': 'loss', 'content': 0.005620269570499659, 'timestamp': '2025-09-10 02:26:55.996766', 'step': 801, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:56.025235', 'step': 801, 'epoch': 1} {'type': 'loss', 'content': 0.0072153410874307156, 'timestamp': '2025-09-10 02:26:56.026799', 'step': 802, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:56.054985', 'step': 802, 'epoch': 1} {'type': 'loss', 'content': 0.026846928521990776, 'timestamp': '2025-09-10 02:26:56.056759', 'step': 803, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:56.084982', 'step': 803, 'epoch': 1} {'type': 'loss', 'content': 0.04960143193602562, 'timestamp': '2025-09-10 02:26:56.107994', 'step': 804, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:56.136673', 'step': 804, 'epoch': 1} {'type': 'loss', 'content': 0.0130831403657794, 'timestamp': '2025-09-10 02:26:56.138023', 'step': 805, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:56.166808', 'step': 805, 'epoch': 1} {'type': 'loss', 'content': 0.025565098971128464, 'timestamp': '2025-09-10 02:26:56.168815', 'step': 806, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:56.197806', 'step': 806, 'epoch': 1} {'type': 'loss', 'content': 0.027919035404920578, 'timestamp': '2025-09-10 02:26:56.199264', 'step': 807, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:56.227908', 'step': 807, 'epoch': 1} {'type': 'loss', 'content': 0.05793684348464012, 'timestamp': '2025-09-10 02:26:56.251174', 'step': 808, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:56.279689', 'step': 808, 'epoch': 1} {'type': 'loss', 'content': 0.04225584864616394, 'timestamp': '2025-09-10 02:26:56.281236', 'step': 809, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:56.311362', 'step': 809, 'epoch': 1} {'type': 'loss', 'content': 0.01058135274797678, 'timestamp': '2025-09-10 02:26:56.313019', 'step': 810, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:56.342087', 'step': 810, 'epoch': 1} {'type': 'loss', 'content': 0.0357813760638237, 'timestamp': '2025-09-10 02:26:56.344028', 'step': 811, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:56.373216', 'step': 811, 'epoch': 1} {'type': 'loss', 'content': 0.025642748922109604, 'timestamp': '2025-09-10 02:26:56.396589', 'step': 812, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:56.425545', 'step': 812, 'epoch': 1} {'type': 'loss', 'content': 0.024076486006379128, 'timestamp': '2025-09-10 02:26:56.427250', 'step': 813, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:56.455943', 'step': 813, 'epoch': 1} {'type': 'loss', 'content': 0.026659991592168808, 'timestamp': '2025-09-10 02:26:56.457961', 'step': 814, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:56.486793', 'step': 814, 'epoch': 1} {'type': 'loss', 'content': 0.005625136662274599, 'timestamp': '2025-09-10 02:26:56.489885', 'step': 815, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:56.521733', 'step': 815, 'epoch': 1} {'type': 'loss', 'content': 0.05507013946771622, 'timestamp': '2025-09-10 02:26:56.545080', 'step': 816, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:56.574042', 'step': 816, 'epoch': 1} {'type': 'loss', 'content': 0.04457852989435196, 'timestamp': '2025-09-10 02:26:56.575662', 'step': 817, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:56.603926', 'step': 817, 'epoch': 1} {'type': 'loss', 'content': 0.01740320399403572, 'timestamp': '2025-09-10 02:26:56.605442', 'step': 818, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:56.633790', 'step': 818, 'epoch': 1} {'type': 'loss', 'content': 0.011634668335318565, 'timestamp': '2025-09-10 02:26:56.635563', 'step': 819, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:56.664053', 'step': 819, 'epoch': 1} {'type': 'loss', 'content': 0.0116079431027174, 'timestamp': '2025-09-10 02:26:56.687293', 'step': 820, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:56.715854', 'step': 820, 'epoch': 1} {'type': 'loss', 'content': 0.035347651690244675, 'timestamp': '2025-09-10 02:26:56.717193', 'step': 821, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:56.745555', 'step': 821, 'epoch': 1} {'type': 'loss', 'content': 0.0406646691262722, 'timestamp': '2025-09-10 02:26:56.746890', 'step': 822, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:56.775320', 'step': 822, 'epoch': 1} {'type': 'loss', 'content': 0.003157601458951831, 'timestamp': '2025-09-10 02:26:56.776707', 'step': 823, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:56.805168', 'step': 823, 'epoch': 1} {'type': 'loss', 'content': 0.0253426693379879, 'timestamp': '2025-09-10 02:26:56.828211', 'step': 824, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:56.856754', 'step': 824, 'epoch': 1} {'type': 'loss', 'content': 0.03272155672311783, 'timestamp': '2025-09-10 02:26:56.858472', 'step': 825, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:56.887504', 'step': 825, 'epoch': 1} {'type': 'loss', 'content': 0.03958987444639206, 'timestamp': '2025-09-10 02:26:56.889192', 'step': 826, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:56.918050', 'step': 826, 'epoch': 1} {'type': 'loss', 'content': 0.019212845712900162, 'timestamp': '2025-09-10 02:26:56.919780', 'step': 827, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:56.948559', 'step': 827, 'epoch': 1} {'type': 'loss', 'content': 0.007750155869871378, 'timestamp': '2025-09-10 02:26:56.971964', 'step': 828, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:57.000895', 'step': 828, 'epoch': 1} {'type': 'loss', 'content': 0.046940695494413376, 'timestamp': '2025-09-10 02:26:57.002562', 'step': 829, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:57.031079', 'step': 829, 'epoch': 1} {'type': 'loss', 'content': 0.06961827725172043, 'timestamp': '2025-09-10 02:26:57.032686', 'step': 830, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:57.060940', 'step': 830, 'epoch': 1} {'type': 'loss', 'content': 0.04929071292281151, 'timestamp': '2025-09-10 02:26:57.062552', 'step': 831, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:57.090902', 'step': 831, 'epoch': 1} {'type': 'loss', 'content': 0.028989458456635475, 'timestamp': '2025-09-10 02:26:57.114201', 'step': 832, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:57.142375', 'step': 832, 'epoch': 1} {'type': 'loss', 'content': 0.06238769739866257, 'timestamp': '2025-09-10 02:26:57.143794', 'step': 833, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:57.172558', 'step': 833, 'epoch': 1} {'type': 'loss', 'content': 0.032351333647966385, 'timestamp': '2025-09-10 02:26:57.173915', 'step': 834, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:57.202245', 'step': 834, 'epoch': 1} {'type': 'loss', 'content': 0.05260155349969864, 'timestamp': '2025-09-10 02:26:57.203623', 'step': 835, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:57.231949', 'step': 835, 'epoch': 1} {'type': 'loss', 'content': 0.07337234914302826, 'timestamp': '2025-09-10 02:26:57.254968', 'step': 836, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:57.283641', 'step': 836, 'epoch': 1} {'type': 'loss', 'content': 0.02990635298192501, 'timestamp': '2025-09-10 02:26:57.284964', 'step': 837, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:57.313134', 'step': 837, 'epoch': 1} {'type': 'loss', 'content': 0.011495047248899937, 'timestamp': '2025-09-10 02:26:57.314780', 'step': 838, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:57.344778', 'step': 838, 'epoch': 1} {'type': 'loss', 'content': 0.07979269325733185, 'timestamp': '2025-09-10 02:26:57.346378', 'step': 839, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:57.375843', 'step': 839, 'epoch': 1} {'type': 'loss', 'content': 0.007071019150316715, 'timestamp': '2025-09-10 02:26:57.399071', 'step': 840, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:57.427834', 'step': 840, 'epoch': 1} {'type': 'loss', 'content': 0.028329063206911087, 'timestamp': '2025-09-10 02:26:57.429384', 'step': 841, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:57.457911', 'step': 841, 'epoch': 1} {'type': 'loss', 'content': 0.0028442800976336002, 'timestamp': '2025-09-10 02:26:57.459590', 'step': 842, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:57.488580', 'step': 842, 'epoch': 1} {'type': 'loss', 'content': 0.004879837390035391, 'timestamp': '2025-09-10 02:26:57.490514', 'step': 843, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:57.519550', 'step': 843, 'epoch': 1} {'type': 'loss', 'content': 0.03970589488744736, 'timestamp': '2025-09-10 02:26:57.542861', 'step': 844, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:26:57.572161', 'step': 844, 'epoch': 1} {'type': 'loss', 'content': 0.011552110314369202, 'timestamp': '2025-09-10 02:26:57.573779', 'step': 845, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:57.602524', 'step': 845, 'epoch': 1} {'type': 'loss', 'content': 0.005572037305682898, 'timestamp': '2025-09-10 02:26:57.604230', 'step': 846, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:57.632628', 'step': 846, 'epoch': 1} {'type': 'loss', 'content': 0.024660101160407066, 'timestamp': '2025-09-10 02:26:57.634239', 'step': 847, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:57.662928', 'step': 847, 'epoch': 1} {'type': 'loss', 'content': 0.010990379378199577, 'timestamp': '2025-09-10 02:26:57.686262', 'step': 848, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:57.715047', 'step': 848, 'epoch': 1} {'type': 'loss', 'content': 0.02281826175749302, 'timestamp': '2025-09-10 02:26:57.716448', 'step': 849, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:57.744947', 'step': 849, 'epoch': 1} {'type': 'loss', 'content': 0.05251472443342209, 'timestamp': '2025-09-10 02:26:57.746690', 'step': 850, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:57.775116', 'step': 850, 'epoch': 1} {'type': 'loss', 'content': 0.001956451218575239, 'timestamp': '2025-09-10 02:26:57.776778', 'step': 851, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:57.805487', 'step': 851, 'epoch': 1} {'type': 'loss', 'content': 0.03003103658556938, 'timestamp': '2025-09-10 02:26:57.828694', 'step': 852, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:57.857641', 'step': 852, 'epoch': 1} {'type': 'loss', 'content': 0.038373447954654694, 'timestamp': '2025-09-10 02:26:57.859206', 'step': 853, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:57.887907', 'step': 853, 'epoch': 1} {'type': 'loss', 'content': 0.017338624224066734, 'timestamp': '2025-09-10 02:26:57.889383', 'step': 854, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:57.917855', 'step': 854, 'epoch': 1} {'type': 'loss', 'content': 0.028965888544917107, 'timestamp': '2025-09-10 02:26:57.919217', 'step': 855, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:57.947750', 'step': 855, 'epoch': 1} {'type': 'loss', 'content': 0.01129152625799179, 'timestamp': '2025-09-10 02:26:57.971105', 'step': 856, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:57.999907', 'step': 856, 'epoch': 1} {'type': 'loss', 'content': 0.018316298723220825, 'timestamp': '2025-09-10 02:26:58.001399', 'step': 857, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:58.029631', 'step': 857, 'epoch': 1} {'type': 'loss', 'content': 0.040882717818021774, 'timestamp': '2025-09-10 02:26:58.031262', 'step': 858, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:58.059571', 'step': 858, 'epoch': 1} {'type': 'loss', 'content': 0.04055514559149742, 'timestamp': '2025-09-10 02:26:58.061167', 'step': 859, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:58.089540', 'step': 859, 'epoch': 1} {'type': 'loss', 'content': 0.08326940983533859, 'timestamp': '2025-09-10 02:26:58.112736', 'step': 860, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:58.141039', 'step': 860, 'epoch': 1} {'type': 'loss', 'content': 0.04865090176463127, 'timestamp': '2025-09-10 02:26:58.142449', 'step': 861, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:58.171250', 'step': 861, 'epoch': 1} {'type': 'loss', 'content': 0.016379257664084435, 'timestamp': '2025-09-10 02:26:58.172849', 'step': 862, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:58.201208', 'step': 862, 'epoch': 1} {'type': 'loss', 'content': 0.07710827142000198, 'timestamp': '2025-09-10 02:26:58.202791', 'step': 863, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:58.230971', 'step': 863, 'epoch': 1} {'type': 'loss', 'content': 0.027466127648949623, 'timestamp': '2025-09-10 02:26:58.254276', 'step': 864, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:58.282593', 'step': 864, 'epoch': 1} {'type': 'loss', 'content': 0.03628736361861229, 'timestamp': '2025-09-10 02:26:58.284275', 'step': 865, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:58.313466', 'step': 865, 'epoch': 1} {'type': 'loss', 'content': 0.026968399062752724, 'timestamp': '2025-09-10 02:26:58.315050', 'step': 866, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:58.343378', 'step': 866, 'epoch': 1} {'type': 'loss', 'content': 0.041775938123464584, 'timestamp': '2025-09-10 02:26:58.345033', 'step': 867, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:58.373602', 'step': 867, 'epoch': 1} {'type': 'loss', 'content': 0.047613587230443954, 'timestamp': '2025-09-10 02:26:58.396737', 'step': 868, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:58.425338', 'step': 868, 'epoch': 1} {'type': 'loss', 'content': 0.05883047729730606, 'timestamp': '2025-09-10 02:26:58.426964', 'step': 869, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:58.455404', 'step': 869, 'epoch': 1} {'type': 'loss', 'content': 0.06252393126487732, 'timestamp': '2025-09-10 02:26:58.456885', 'step': 870, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:58.484735', 'step': 870, 'epoch': 1} {'type': 'loss', 'content': 0.015127004124224186, 'timestamp': '2025-09-10 02:26:58.485914', 'step': 871, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:58.514780', 'step': 871, 'epoch': 1} {'type': 'loss', 'content': 0.01448144856840372, 'timestamp': '2025-09-10 02:26:58.539561', 'step': 872, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:26:58.572959', 'step': 872, 'epoch': 1} {'type': 'loss', 'content': 0.06627927720546722, 'timestamp': '2025-09-10 02:26:58.574374', 'step': 873, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:58.603531', 'step': 873, 'epoch': 1} {'type': 'loss', 'content': 0.019005369395017624, 'timestamp': '2025-09-10 02:26:58.605178', 'step': 874, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:58.634262', 'step': 874, 'epoch': 1} {'type': 'loss', 'content': 0.02605721540749073, 'timestamp': '2025-09-10 02:26:58.636434', 'step': 875, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:58.664789', 'step': 875, 'epoch': 1} {'type': 'loss', 'content': 0.03991827368736267, 'timestamp': '2025-09-10 02:26:58.688019', 'step': 876, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:58.716716', 'step': 876, 'epoch': 1} {'type': 'loss', 'content': 0.03819936141371727, 'timestamp': '2025-09-10 02:26:58.718837', 'step': 877, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:58.747422', 'step': 877, 'epoch': 1} {'type': 'loss', 'content': 0.0238080732524395, 'timestamp': '2025-09-10 02:26:58.749301', 'step': 878, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:58.778063', 'step': 878, 'epoch': 1} {'type': 'loss', 'content': 0.023295262828469276, 'timestamp': '2025-09-10 02:26:58.782160', 'step': 879, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:58.814474', 'step': 879, 'epoch': 1} {'type': 'loss', 'content': 0.043232087045907974, 'timestamp': '2025-09-10 02:26:58.837418', 'step': 880, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:58.866795', 'step': 880, 'epoch': 1} {'type': 'loss', 'content': 0.03321755677461624, 'timestamp': '2025-09-10 02:26:58.867959', 'step': 881, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:58.918272', 'step': 881, 'epoch': 1} {'type': 'loss', 'content': 0.03598690778017044, 'timestamp': '2025-09-10 02:26:58.919779', 'step': 882, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:58.959653', 'step': 882, 'epoch': 1} {'type': 'loss', 'content': 0.032034873962402344, 'timestamp': '2025-09-10 02:26:58.961079', 'step': 883, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:58.989528', 'step': 883, 'epoch': 1} {'type': 'loss', 'content': 0.024193478748202324, 'timestamp': '2025-09-10 02:26:59.019158', 'step': 884, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:59.048228', 'step': 884, 'epoch': 1} {'type': 'loss', 'content': 0.02453945204615593, 'timestamp': '2025-09-10 02:26:59.049847', 'step': 885, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:59.078136', 'step': 885, 'epoch': 1} {'type': 'loss', 'content': 0.021976953372359276, 'timestamp': '2025-09-10 02:26:59.079740', 'step': 886, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:59.108089', 'step': 886, 'epoch': 1} {'type': 'loss', 'content': 0.029496390372514725, 'timestamp': '2025-09-10 02:26:59.109965', 'step': 887, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:59.138397', 'step': 887, 'epoch': 1} {'type': 'loss', 'content': 0.04946603253483772, 'timestamp': '2025-09-10 02:26:59.161913', 'step': 888, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:59.190814', 'step': 888, 'epoch': 1} {'type': 'loss', 'content': 0.05130714550614357, 'timestamp': '2025-09-10 02:26:59.192630', 'step': 889, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:59.229788', 'step': 889, 'epoch': 1} {'type': 'loss', 'content': 0.008475149050354958, 'timestamp': '2025-09-10 02:26:59.231398', 'step': 890, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:26:59.279503', 'step': 890, 'epoch': 1} {'type': 'loss', 'content': 0.06713636219501495, 'timestamp': '2025-09-10 02:26:59.281228', 'step': 891, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:59.309801', 'step': 891, 'epoch': 1} {'type': 'loss', 'content': 0.010133733041584492, 'timestamp': '2025-09-10 02:26:59.333035', 'step': 892, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:59.361789', 'step': 892, 'epoch': 1} {'type': 'loss', 'content': 0.016804341226816177, 'timestamp': '2025-09-10 02:26:59.363385', 'step': 893, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:59.391648', 'step': 893, 'epoch': 1} {'type': 'loss', 'content': 0.04320318624377251, 'timestamp': '2025-09-10 02:26:59.393321', 'step': 894, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:59.421898', 'step': 894, 'epoch': 1} {'type': 'loss', 'content': 0.05202284827828407, 'timestamp': '2025-09-10 02:26:59.423531', 'step': 895, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:59.456764', 'step': 895, 'epoch': 1} {'type': 'loss', 'content': 0.039947301149368286, 'timestamp': '2025-09-10 02:26:59.480192', 'step': 896, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:59.508979', 'step': 896, 'epoch': 1} {'type': 'loss', 'content': 0.04276236519217491, 'timestamp': '2025-09-10 02:26:59.510718', 'step': 897, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:59.538986', 'step': 897, 'epoch': 1} {'type': 'loss', 'content': 0.05527576431632042, 'timestamp': '2025-09-10 02:26:59.540380', 'step': 898, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:59.568843', 'step': 898, 'epoch': 1} {'type': 'loss', 'content': 0.04893497750163078, 'timestamp': '2025-09-10 02:26:59.570093', 'step': 899, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:59.598704', 'step': 899, 'epoch': 1} {'type': 'loss', 'content': 0.036729227751493454, 'timestamp': '2025-09-10 02:26:59.621797', 'step': 900, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:59.650523', 'step': 900, 'epoch': 1} {'type': 'loss', 'content': 0.013712167739868164, 'timestamp': '2025-09-10 02:26:59.652162', 'step': 901, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:59.680523', 'step': 901, 'epoch': 1} {'type': 'loss', 'content': 0.04306911304593086, 'timestamp': '2025-09-10 02:26:59.682064', 'step': 902, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:59.710335', 'step': 902, 'epoch': 1} {'type': 'loss', 'content': 0.027140114456415176, 'timestamp': '2025-09-10 02:26:59.711924', 'step': 903, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:59.740712', 'step': 903, 'epoch': 1} {'type': 'loss', 'content': 0.020640883594751358, 'timestamp': '2025-09-10 02:26:59.763980', 'step': 904, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:59.792590', 'step': 904, 'epoch': 1} {'type': 'loss', 'content': 0.022836310788989067, 'timestamp': '2025-09-10 02:26:59.794209', 'step': 905, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:59.823280', 'step': 905, 'epoch': 1} {'type': 'loss', 'content': 0.03370488062500954, 'timestamp': '2025-09-10 02:26:59.824985', 'step': 906, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:59.853309', 'step': 906, 'epoch': 1} {'type': 'loss', 'content': 0.04225897043943405, 'timestamp': '2025-09-10 02:26:59.855144', 'step': 907, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:59.883893', 'step': 907, 'epoch': 1} {'type': 'loss', 'content': 0.008355808444321156, 'timestamp': '2025-09-10 02:26:59.907023', 'step': 908, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:59.935479', 'step': 908, 'epoch': 1} {'type': 'loss', 'content': 0.04681595414876938, 'timestamp': '2025-09-10 02:26:59.937251', 'step': 909, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:26:59.965718', 'step': 909, 'epoch': 1} {'type': 'loss', 'content': 0.03790203854441643, 'timestamp': '2025-09-10 02:26:59.967415', 'step': 910, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:26:59.996145', 'step': 910, 'epoch': 1} {'type': 'loss', 'content': 0.0279624555259943, 'timestamp': '2025-09-10 02:26:59.997857', 'step': 911, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:00.026413', 'step': 911, 'epoch': 1} {'type': 'loss', 'content': 0.01219746470451355, 'timestamp': '2025-09-10 02:27:00.049618', 'step': 912, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1582003754624}], 'timestamp': '2025-09-10 02:27:01.874939', 'step': 912, 'epoch': 1} {'type': 'pplx', 'content': 2265473.919981855, 'timestamp': '2025-09-10 02:27:01.876535', 'step': 912, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:01.904830', 'step': 912, 'epoch': 1} {'type': 'loss', 'content': 0.03966560214757919, 'timestamp': '2025-09-10 02:27:01.906208', 'step': 913, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:01.934727', 'step': 913, 'epoch': 1} {'type': 'loss', 'content': 0.03247629851102829, 'timestamp': '2025-09-10 02:27:01.936280', 'step': 914, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:01.964790', 'step': 914, 'epoch': 1} {'type': 'loss', 'content': 0.025539681315422058, 'timestamp': '2025-09-10 02:27:01.966146', 'step': 915, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:01.994786', 'step': 915, 'epoch': 1} {'type': 'loss', 'content': 0.03211888670921326, 'timestamp': '2025-09-10 02:27:02.017800', 'step': 916, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:02.046792', 'step': 916, 'epoch': 1} {'type': 'loss', 'content': 0.012743504717946053, 'timestamp': '2025-09-10 02:27:02.048254', 'step': 917, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:02.076895', 'step': 917, 'epoch': 1} {'type': 'loss', 'content': 0.03760813549160957, 'timestamp': '2025-09-10 02:27:02.078501', 'step': 918, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:02.108084', 'step': 918, 'epoch': 1} {'type': 'loss', 'content': 0.015784770250320435, 'timestamp': '2025-09-10 02:27:02.109666', 'step': 919, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:02.138145', 'step': 919, 'epoch': 1} {'type': 'loss', 'content': 0.044560663402080536, 'timestamp': '2025-09-10 02:27:02.161404', 'step': 920, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:02.190162', 'step': 920, 'epoch': 1} {'type': 'loss', 'content': 0.04900949448347092, 'timestamp': '2025-09-10 02:27:02.191794', 'step': 921, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:02.219935', 'step': 921, 'epoch': 1} {'type': 'loss', 'content': 0.018750814720988274, 'timestamp': '2025-09-10 02:27:02.221506', 'step': 922, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:02.249736', 'step': 922, 'epoch': 1} {'type': 'loss', 'content': 0.04206148535013199, 'timestamp': '2025-09-10 02:27:02.251491', 'step': 923, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:02.280172', 'step': 923, 'epoch': 1} {'type': 'loss', 'content': 0.021803708747029305, 'timestamp': '2025-09-10 02:27:02.303488', 'step': 924, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:02.332263', 'step': 924, 'epoch': 1} {'type': 'loss', 'content': 0.033080197870731354, 'timestamp': '2025-09-10 02:27:02.334070', 'step': 925, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:02.362784', 'step': 925, 'epoch': 1} {'type': 'loss', 'content': 0.014295650646090508, 'timestamp': '2025-09-10 02:27:02.364554', 'step': 926, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:02.392983', 'step': 926, 'epoch': 1} {'type': 'loss', 'content': 0.042352236807346344, 'timestamp': '2025-09-10 02:27:02.394776', 'step': 927, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:02.423097', 'step': 927, 'epoch': 1} {'type': 'loss', 'content': 0.04907870292663574, 'timestamp': '2025-09-10 02:27:02.446331', 'step': 928, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:02.474628', 'step': 928, 'epoch': 1} {'type': 'loss', 'content': 0.038371481001377106, 'timestamp': '2025-09-10 02:27:02.477096', 'step': 929, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:02.508432', 'step': 929, 'epoch': 1} {'type': 'loss', 'content': 0.049601972103118896, 'timestamp': '2025-09-10 02:27:02.510210', 'step': 930, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:02.538846', 'step': 930, 'epoch': 1} {'type': 'loss', 'content': 0.012743870727717876, 'timestamp': '2025-09-10 02:27:02.540460', 'step': 931, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:02.568749', 'step': 931, 'epoch': 1} {'type': 'loss', 'content': 0.06787528842687607, 'timestamp': '2025-09-10 02:27:02.591908', 'step': 932, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:02.620686', 'step': 932, 'epoch': 1} {'type': 'loss', 'content': 0.02233758009970188, 'timestamp': '2025-09-10 02:27:02.622124', 'step': 933, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:02.650189', 'step': 933, 'epoch': 1} {'type': 'loss', 'content': 0.023858215659856796, 'timestamp': '2025-09-10 02:27:02.651791', 'step': 934, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:02.680110', 'step': 934, 'epoch': 1} {'type': 'loss', 'content': 0.03377927467226982, 'timestamp': '2025-09-10 02:27:02.681605', 'step': 935, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:02.710383', 'step': 935, 'epoch': 1} {'type': 'loss', 'content': 0.05412615090608597, 'timestamp': '2025-09-10 02:27:02.733453', 'step': 936, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:02.761793', 'step': 936, 'epoch': 1} {'type': 'loss', 'content': 0.032133154571056366, 'timestamp': '2025-09-10 02:27:02.763141', 'step': 937, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:02.791620', 'step': 937, 'epoch': 1} {'type': 'loss', 'content': 0.04311128705739975, 'timestamp': '2025-09-10 02:27:02.792882', 'step': 938, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:02.820751', 'step': 938, 'epoch': 1} {'type': 'loss', 'content': 0.03060062788426876, 'timestamp': '2025-09-10 02:27:02.822271', 'step': 939, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:02.850794', 'step': 939, 'epoch': 1} {'type': 'loss', 'content': 0.031108910217881203, 'timestamp': '2025-09-10 02:27:02.873909', 'step': 940, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:02.902147', 'step': 940, 'epoch': 1} {'type': 'loss', 'content': 0.02943595126271248, 'timestamp': '2025-09-10 02:27:02.903480', 'step': 941, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:02.931606', 'step': 941, 'epoch': 1} {'type': 'loss', 'content': 0.022589508444070816, 'timestamp': '2025-09-10 02:27:02.933020', 'step': 942, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:02.960839', 'step': 942, 'epoch': 1} {'type': 'loss', 'content': 0.04562641307711601, 'timestamp': '2025-09-10 02:27:02.962313', 'step': 943, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:02.990669', 'step': 943, 'epoch': 1} {'type': 'loss', 'content': 0.016814980655908585, 'timestamp': '2025-09-10 02:27:03.013643', 'step': 944, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:03.041835', 'step': 944, 'epoch': 1} {'type': 'loss', 'content': 0.017725344747304916, 'timestamp': '2025-09-10 02:27:03.043327', 'step': 945, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:03.071660', 'step': 945, 'epoch': 1} {'type': 'loss', 'content': 0.014994575642049313, 'timestamp': '2025-09-10 02:27:03.073009', 'step': 946, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:03.101297', 'step': 946, 'epoch': 1} {'type': 'loss', 'content': 0.03003416769206524, 'timestamp': '2025-09-10 02:27:03.102795', 'step': 947, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:03.131022', 'step': 947, 'epoch': 1} {'type': 'loss', 'content': 0.055385202169418335, 'timestamp': '2025-09-10 02:27:03.154195', 'step': 948, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:03.182780', 'step': 948, 'epoch': 1} {'type': 'loss', 'content': 0.025065213441848755, 'timestamp': '2025-09-10 02:27:03.184270', 'step': 949, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:03.213047', 'step': 949, 'epoch': 1} {'type': 'loss', 'content': 0.025820758193731308, 'timestamp': '2025-09-10 02:27:03.214556', 'step': 950, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:03.243108', 'step': 950, 'epoch': 1} {'type': 'loss', 'content': 0.05613445118069649, 'timestamp': '2025-09-10 02:27:03.244487', 'step': 951, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:03.273004', 'step': 951, 'epoch': 1} {'type': 'loss', 'content': 0.0411837138235569, 'timestamp': '2025-09-10 02:27:03.295895', 'step': 952, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:03.324441', 'step': 952, 'epoch': 1} {'type': 'loss', 'content': 0.011896687559783459, 'timestamp': '2025-09-10 02:27:03.326055', 'step': 953, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:03.354239', 'step': 953, 'epoch': 1} {'type': 'loss', 'content': 0.0059830015525221825, 'timestamp': '2025-09-10 02:27:03.355837', 'step': 954, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:03.384199', 'step': 954, 'epoch': 1} {'type': 'loss', 'content': 0.03133850172162056, 'timestamp': '2025-09-10 02:27:03.385963', 'step': 955, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:03.413976', 'step': 955, 'epoch': 1} {'type': 'loss', 'content': 0.02864186279475689, 'timestamp': '2025-09-10 02:27:03.436719', 'step': 956, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:03.465165', 'step': 956, 'epoch': 1} {'type': 'loss', 'content': 0.0447709895670414, 'timestamp': '2025-09-10 02:27:03.466830', 'step': 957, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:03.494904', 'step': 957, 'epoch': 1} {'type': 'loss', 'content': 0.031331706792116165, 'timestamp': '2025-09-10 02:27:03.496441', 'step': 958, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:03.524983', 'step': 958, 'epoch': 1} {'type': 'loss', 'content': 0.023064518347382545, 'timestamp': '2025-09-10 02:27:03.527064', 'step': 959, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:03.555224', 'step': 959, 'epoch': 1} {'type': 'loss', 'content': 0.016874434426426888, 'timestamp': '2025-09-10 02:27:03.578102', 'step': 960, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:03.607090', 'step': 960, 'epoch': 1} {'type': 'loss', 'content': 0.01792900077998638, 'timestamp': '2025-09-10 02:27:03.608488', 'step': 961, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:03.637568', 'step': 961, 'epoch': 1} {'type': 'loss', 'content': 0.042464207857847214, 'timestamp': '2025-09-10 02:27:03.639224', 'step': 962, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:03.668193', 'step': 962, 'epoch': 1} {'type': 'loss', 'content': 0.018629251047968864, 'timestamp': '2025-09-10 02:27:03.669950', 'step': 963, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:03.699151', 'step': 963, 'epoch': 1} {'type': 'loss', 'content': 0.03737715631723404, 'timestamp': '2025-09-10 02:27:03.722447', 'step': 964, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:03.752221', 'step': 964, 'epoch': 1} {'type': 'loss', 'content': 0.06381294131278992, 'timestamp': '2025-09-10 02:27:03.753951', 'step': 965, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:03.782466', 'step': 965, 'epoch': 1} {'type': 'loss', 'content': 0.029692605137825012, 'timestamp': '2025-09-10 02:27:03.784347', 'step': 966, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:03.813007', 'step': 966, 'epoch': 1} {'type': 'loss', 'content': 0.04313949868083, 'timestamp': '2025-09-10 02:27:03.814711', 'step': 967, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:03.843096', 'step': 967, 'epoch': 1} {'type': 'loss', 'content': 0.016904447227716446, 'timestamp': '2025-09-10 02:27:03.866313', 'step': 968, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:03.895093', 'step': 968, 'epoch': 1} {'type': 'loss', 'content': 0.0268878061324358, 'timestamp': '2025-09-10 02:27:03.896895', 'step': 969, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:03.925440', 'step': 969, 'epoch': 1} {'type': 'loss', 'content': 0.012326344847679138, 'timestamp': '2025-09-10 02:27:03.927384', 'step': 970, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:03.955801', 'step': 970, 'epoch': 1} {'type': 'loss', 'content': 0.025940924882888794, 'timestamp': '2025-09-10 02:27:03.957524', 'step': 971, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:03.986154', 'step': 971, 'epoch': 1} {'type': 'loss', 'content': 0.015164701268076897, 'timestamp': '2025-09-10 02:27:04.009311', 'step': 972, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:04.037614', 'step': 972, 'epoch': 1} {'type': 'loss', 'content': 0.10002827644348145, 'timestamp': '2025-09-10 02:27:04.039339', 'step': 973, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:04.068016', 'step': 973, 'epoch': 1} {'type': 'loss', 'content': 0.040858518332242966, 'timestamp': '2025-09-10 02:27:04.069827', 'step': 974, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:04.098206', 'step': 974, 'epoch': 1} {'type': 'loss', 'content': 0.016241848468780518, 'timestamp': '2025-09-10 02:27:04.099834', 'step': 975, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:04.128428', 'step': 975, 'epoch': 1} {'type': 'loss', 'content': 0.07131864875555038, 'timestamp': '2025-09-10 02:27:04.151640', 'step': 976, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:04.180893', 'step': 976, 'epoch': 1} {'type': 'loss', 'content': 0.03664417937397957, 'timestamp': '2025-09-10 02:27:04.182549', 'step': 977, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:04.210595', 'step': 977, 'epoch': 1} {'type': 'loss', 'content': 0.050497375428676605, 'timestamp': '2025-09-10 02:27:04.212159', 'step': 978, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:04.240365', 'step': 978, 'epoch': 1} {'type': 'loss', 'content': 0.017245125025510788, 'timestamp': '2025-09-10 02:27:04.242231', 'step': 979, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:04.270994', 'step': 979, 'epoch': 1} {'type': 'loss', 'content': 0.04215339943766594, 'timestamp': '2025-09-10 02:27:04.294105', 'step': 980, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:04.322785', 'step': 980, 'epoch': 1} {'type': 'loss', 'content': 0.003192092990502715, 'timestamp': '2025-09-10 02:27:04.324488', 'step': 981, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:04.353422', 'step': 981, 'epoch': 1} {'type': 'loss', 'content': 0.02548823133111, 'timestamp': '2025-09-10 02:27:04.355231', 'step': 982, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:04.384531', 'step': 982, 'epoch': 1} {'type': 'loss', 'content': 0.017364230006933212, 'timestamp': '2025-09-10 02:27:04.386432', 'step': 983, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:04.415285', 'step': 983, 'epoch': 1} {'type': 'loss', 'content': 0.024783683940768242, 'timestamp': '2025-09-10 02:27:04.438638', 'step': 984, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:04.467099', 'step': 984, 'epoch': 1} {'type': 'loss', 'content': 0.03101974166929722, 'timestamp': '2025-09-10 02:27:04.468706', 'step': 985, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:04.497053', 'step': 985, 'epoch': 1} {'type': 'loss', 'content': 0.043494176119565964, 'timestamp': '2025-09-10 02:27:04.498665', 'step': 986, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:04.526859', 'step': 986, 'epoch': 1} {'type': 'loss', 'content': 0.010910848155617714, 'timestamp': '2025-09-10 02:27:04.528682', 'step': 987, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:04.558528', 'step': 987, 'epoch': 1} {'type': 'loss', 'content': 0.025911325588822365, 'timestamp': '2025-09-10 02:27:04.581796', 'step': 988, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:04.610727', 'step': 988, 'epoch': 1} {'type': 'loss', 'content': 0.0578092560172081, 'timestamp': '2025-09-10 02:27:04.612286', 'step': 989, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:04.641439', 'step': 989, 'epoch': 1} {'type': 'loss', 'content': 0.04395212605595589, 'timestamp': '2025-09-10 02:27:04.643155', 'step': 990, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:04.672103', 'step': 990, 'epoch': 1} {'type': 'loss', 'content': 0.015900081023573875, 'timestamp': '2025-09-10 02:27:04.673889', 'step': 991, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:04.702703', 'step': 991, 'epoch': 1} {'type': 'loss', 'content': 0.015407932922244072, 'timestamp': '2025-09-10 02:27:04.725898', 'step': 992, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:04.754367', 'step': 992, 'epoch': 1} {'type': 'loss', 'content': 0.07063453644514084, 'timestamp': '2025-09-10 02:27:04.755996', 'step': 993, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:04.784303', 'step': 993, 'epoch': 1} {'type': 'loss', 'content': 0.031969036906957626, 'timestamp': '2025-09-10 02:27:04.785958', 'step': 994, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:04.814521', 'step': 994, 'epoch': 1} {'type': 'loss', 'content': 0.028009014204144478, 'timestamp': '2025-09-10 02:27:04.816387', 'step': 995, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:04.845057', 'step': 995, 'epoch': 1} {'type': 'loss', 'content': 0.0059646558947861195, 'timestamp': '2025-09-10 02:27:04.868182', 'step': 996, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:04.897044', 'step': 996, 'epoch': 1} {'type': 'loss', 'content': 0.06651411205530167, 'timestamp': '2025-09-10 02:27:04.898656', 'step': 997, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:04.926954', 'step': 997, 'epoch': 1} {'type': 'loss', 'content': 0.006047925911843777, 'timestamp': '2025-09-10 02:27:04.928879', 'step': 998, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:04.957467', 'step': 998, 'epoch': 1} {'type': 'loss', 'content': 0.04073864221572876, 'timestamp': '2025-09-10 02:27:04.959268', 'step': 999, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:04.988198', 'step': 999, 'epoch': 1} {'type': 'loss', 'content': 0.02032596431672573, 'timestamp': '2025-09-10 02:27:05.011403', 'step': 1000, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 1000', 'timestamp': '2025-09-10 02:27:09.409925', 'step': 1000, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:09.446363', 'step': 1000, 'epoch': 1} {'type': 'loss', 'content': 0.03654515743255615, 'timestamp': '2025-09-10 02:27:09.448213', 'step': 1001, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:09.477474', 'step': 1001, 'epoch': 1} {'type': 'loss', 'content': 0.047440074384212494, 'timestamp': '2025-09-10 02:27:09.479215', 'step': 1002, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:09.507798', 'step': 1002, 'epoch': 1} {'type': 'loss', 'content': 0.04047483578324318, 'timestamp': '2025-09-10 02:27:09.509390', 'step': 1003, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:09.538484', 'step': 1003, 'epoch': 1} {'type': 'loss', 'content': 0.03700224682688713, 'timestamp': '2025-09-10 02:27:09.561972', 'step': 1004, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:09.590498', 'step': 1004, 'epoch': 1} {'type': 'loss', 'content': 0.04263563081622124, 'timestamp': '2025-09-10 02:27:09.592363', 'step': 1005, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:09.620779', 'step': 1005, 'epoch': 1} {'type': 'loss', 'content': 0.035001423209905624, 'timestamp': '2025-09-10 02:27:09.622665', 'step': 1006, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:09.651589', 'step': 1006, 'epoch': 1} {'type': 'loss', 'content': 0.013720368035137653, 'timestamp': '2025-09-10 02:27:09.653375', 'step': 1007, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:09.682237', 'step': 1007, 'epoch': 1} {'type': 'loss', 'content': 0.024205489084124565, 'timestamp': '2025-09-10 02:27:09.705612', 'step': 1008, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:09.733682', 'step': 1008, 'epoch': 1} {'type': 'loss', 'content': 0.043926943093538284, 'timestamp': '2025-09-10 02:27:09.735479', 'step': 1009, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:09.764005', 'step': 1009, 'epoch': 1} {'type': 'loss', 'content': 0.04182643070816994, 'timestamp': '2025-09-10 02:27:09.765809', 'step': 1010, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:09.793970', 'step': 1010, 'epoch': 1} {'type': 'loss', 'content': 0.034336067736148834, 'timestamp': '2025-09-10 02:27:09.795569', 'step': 1011, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:09.824126', 'step': 1011, 'epoch': 1} {'type': 'loss', 'content': 0.026709143072366714, 'timestamp': '2025-09-10 02:27:09.847448', 'step': 1012, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:09.875869', 'step': 1012, 'epoch': 1} {'type': 'loss', 'content': 0.04808344691991806, 'timestamp': '2025-09-10 02:27:09.878106', 'step': 1013, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:09.907276', 'step': 1013, 'epoch': 1} {'type': 'loss', 'content': 0.028084395453333855, 'timestamp': '2025-09-10 02:27:09.908860', 'step': 1014, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:09.937127', 'step': 1014, 'epoch': 1} {'type': 'loss', 'content': 0.028582213446497917, 'timestamp': '2025-09-10 02:27:09.938786', 'step': 1015, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:09.967034', 'step': 1015, 'epoch': 1} {'type': 'loss', 'content': 0.04544654116034508, 'timestamp': '2025-09-10 02:27:09.990038', 'step': 1016, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:10.018453', 'step': 1016, 'epoch': 1} {'type': 'loss', 'content': 0.07535956799983978, 'timestamp': '2025-09-10 02:27:10.020201', 'step': 1017, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:10.048706', 'step': 1017, 'epoch': 1} {'type': 'loss', 'content': 0.023995866999030113, 'timestamp': '2025-09-10 02:27:10.050522', 'step': 1018, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:10.078807', 'step': 1018, 'epoch': 1} {'type': 'loss', 'content': 0.017657050862908363, 'timestamp': '2025-09-10 02:27:10.080466', 'step': 1019, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:10.108715', 'step': 1019, 'epoch': 1} {'type': 'loss', 'content': 0.028995100408792496, 'timestamp': '2025-09-10 02:27:10.131780', 'step': 1020, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:10.160448', 'step': 1020, 'epoch': 1} {'type': 'loss', 'content': 0.020763801410794258, 'timestamp': '2025-09-10 02:27:10.162146', 'step': 1021, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:10.190624', 'step': 1021, 'epoch': 1} {'type': 'loss', 'content': 0.0394793376326561, 'timestamp': '2025-09-10 02:27:10.192285', 'step': 1022, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:10.221645', 'step': 1022, 'epoch': 1} {'type': 'loss', 'content': 0.0334097184240818, 'timestamp': '2025-09-10 02:27:10.223224', 'step': 1023, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:10.252436', 'step': 1023, 'epoch': 1} {'type': 'loss', 'content': 0.021310362964868546, 'timestamp': '2025-09-10 02:27:10.276281', 'step': 1024, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:10.305614', 'step': 1024, 'epoch': 1} {'type': 'loss', 'content': 0.028816591948270798, 'timestamp': '2025-09-10 02:27:10.307429', 'step': 1025, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:10.336526', 'step': 1025, 'epoch': 1} {'type': 'loss', 'content': 0.03184013068675995, 'timestamp': '2025-09-10 02:27:10.338348', 'step': 1026, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:10.367897', 'step': 1026, 'epoch': 1} {'type': 'loss', 'content': 0.024549782276153564, 'timestamp': '2025-09-10 02:27:10.369674', 'step': 1027, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:10.398144', 'step': 1027, 'epoch': 1} {'type': 'loss', 'content': 0.020799925550818443, 'timestamp': '2025-09-10 02:27:10.421521', 'step': 1028, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:10.450482', 'step': 1028, 'epoch': 1} {'type': 'loss', 'content': 0.02477448806166649, 'timestamp': '2025-09-10 02:27:10.452148', 'step': 1029, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:10.480617', 'step': 1029, 'epoch': 1} {'type': 'loss', 'content': 0.0107790632173419, 'timestamp': '2025-09-10 02:27:10.482491', 'step': 1030, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:10.511187', 'step': 1030, 'epoch': 1} {'type': 'loss', 'content': 0.02755180187523365, 'timestamp': '2025-09-10 02:27:10.512835', 'step': 1031, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:10.541499', 'step': 1031, 'epoch': 1} {'type': 'loss', 'content': 0.011883458122611046, 'timestamp': '2025-09-10 02:27:10.564836', 'step': 1032, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:10.593478', 'step': 1032, 'epoch': 1} {'type': 'loss', 'content': 0.050424523651599884, 'timestamp': '2025-09-10 02:27:10.595216', 'step': 1033, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:10.624006', 'step': 1033, 'epoch': 1} {'type': 'loss', 'content': 0.03125917539000511, 'timestamp': '2025-09-10 02:27:10.625942', 'step': 1034, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:10.654356', 'step': 1034, 'epoch': 1} {'type': 'loss', 'content': 0.028955284506082535, 'timestamp': '2025-09-10 02:27:10.656204', 'step': 1035, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:10.684559', 'step': 1035, 'epoch': 1} {'type': 'loss', 'content': 0.010720565915107727, 'timestamp': '2025-09-10 02:27:10.707786', 'step': 1036, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:10.736276', 'step': 1036, 'epoch': 1} {'type': 'loss', 'content': 0.05103132128715515, 'timestamp': '2025-09-10 02:27:10.738069', 'step': 1037, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:10.766381', 'step': 1037, 'epoch': 1} {'type': 'loss', 'content': 0.03147753328084946, 'timestamp': '2025-09-10 02:27:10.768215', 'step': 1038, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:10.796896', 'step': 1038, 'epoch': 1} {'type': 'loss', 'content': 0.03331483528017998, 'timestamp': '2025-09-10 02:27:10.798500', 'step': 1039, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:10.827959', 'step': 1039, 'epoch': 1} {'type': 'loss', 'content': 0.03740553930401802, 'timestamp': '2025-09-10 02:27:10.850967', 'step': 1040, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:10.879672', 'step': 1040, 'epoch': 1} {'type': 'loss', 'content': 0.0681772381067276, 'timestamp': '2025-09-10 02:27:10.881578', 'step': 1041, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:10.910105', 'step': 1041, 'epoch': 1} {'type': 'loss', 'content': 0.048164743930101395, 'timestamp': '2025-09-10 02:27:10.911982', 'step': 1042, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:10.940791', 'step': 1042, 'epoch': 1} {'type': 'loss', 'content': 0.0528777651488781, 'timestamp': '2025-09-10 02:27:10.945610', 'step': 1043, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:10.974158', 'step': 1043, 'epoch': 1} {'type': 'loss', 'content': 0.01392888743430376, 'timestamp': '2025-09-10 02:27:10.997352', 'step': 1044, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:11.026347', 'step': 1044, 'epoch': 1} {'type': 'loss', 'content': 0.02998768724501133, 'timestamp': '2025-09-10 02:27:11.028158', 'step': 1045, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:11.057119', 'step': 1045, 'epoch': 1} {'type': 'loss', 'content': 0.01582830585539341, 'timestamp': '2025-09-10 02:27:11.058773', 'step': 1046, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:11.086986', 'step': 1046, 'epoch': 1} {'type': 'loss', 'content': 0.01603604108095169, 'timestamp': '2025-09-10 02:27:11.088891', 'step': 1047, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:11.117302', 'step': 1047, 'epoch': 1} {'type': 'loss', 'content': 0.049586307257413864, 'timestamp': '2025-09-10 02:27:11.140361', 'step': 1048, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:11.168794', 'step': 1048, 'epoch': 1} {'type': 'loss', 'content': 0.03988540172576904, 'timestamp': '2025-09-10 02:27:11.170612', 'step': 1049, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:11.199250', 'step': 1049, 'epoch': 1} {'type': 'loss', 'content': 0.016875585541129112, 'timestamp': '2025-09-10 02:27:11.201082', 'step': 1050, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:11.229675', 'step': 1050, 'epoch': 1} {'type': 'loss', 'content': 0.041746437549591064, 'timestamp': '2025-09-10 02:27:11.231377', 'step': 1051, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:11.260020', 'step': 1051, 'epoch': 1} {'type': 'loss', 'content': 0.04784726724028587, 'timestamp': '2025-09-10 02:27:11.284225', 'step': 1052, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:11.312594', 'step': 1052, 'epoch': 1} {'type': 'loss', 'content': 0.016732031479477882, 'timestamp': '2025-09-10 02:27:11.314192', 'step': 1053, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:11.343012', 'step': 1053, 'epoch': 1} {'type': 'loss', 'content': 0.020166723057627678, 'timestamp': '2025-09-10 02:27:11.344822', 'step': 1054, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:11.373705', 'step': 1054, 'epoch': 1} {'type': 'loss', 'content': 0.08511082828044891, 'timestamp': '2025-09-10 02:27:11.375430', 'step': 1055, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:11.405111', 'step': 1055, 'epoch': 1} {'type': 'loss', 'content': 0.017505383118987083, 'timestamp': '2025-09-10 02:27:11.428428', 'step': 1056, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:11.457317', 'step': 1056, 'epoch': 1} {'type': 'loss', 'content': 0.03421511873602867, 'timestamp': '2025-09-10 02:27:11.459201', 'step': 1057, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:11.487782', 'step': 1057, 'epoch': 1} {'type': 'loss', 'content': 0.05192688852548599, 'timestamp': '2025-09-10 02:27:11.489481', 'step': 1058, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:11.518000', 'step': 1058, 'epoch': 1} {'type': 'loss', 'content': 0.0304196048527956, 'timestamp': '2025-09-10 02:27:11.519805', 'step': 1059, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:11.548572', 'step': 1059, 'epoch': 1} {'type': 'loss', 'content': 0.04589404910802841, 'timestamp': '2025-09-10 02:27:11.571889', 'step': 1060, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:11.600483', 'step': 1060, 'epoch': 1} {'type': 'loss', 'content': 0.03423125296831131, 'timestamp': '2025-09-10 02:27:11.602102', 'step': 1061, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:11.630912', 'step': 1061, 'epoch': 1} {'type': 'loss', 'content': 0.0522378571331501, 'timestamp': '2025-09-10 02:27:11.632565', 'step': 1062, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:11.660789', 'step': 1062, 'epoch': 1} {'type': 'loss', 'content': 0.061813417822122574, 'timestamp': '2025-09-10 02:27:11.662410', 'step': 1063, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:11.690942', 'step': 1063, 'epoch': 1} {'type': 'loss', 'content': 0.026628315448760986, 'timestamp': '2025-09-10 02:27:11.714067', 'step': 1064, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1582003754624}], 'timestamp': '2025-09-10 02:27:13.561358', 'step': 1064, 'epoch': 1} {'type': 'pplx', 'content': 2371722.7542119753, 'timestamp': '2025-09-10 02:27:13.563301', 'step': 1064, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:13.590792', 'step': 1064, 'epoch': 1} {'type': 'loss', 'content': 0.03160058334469795, 'timestamp': '2025-09-10 02:27:13.592574', 'step': 1065, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:13.621158', 'step': 1065, 'epoch': 1} {'type': 'loss', 'content': 0.04315807297825813, 'timestamp': '2025-09-10 02:27:13.623062', 'step': 1066, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:13.651717', 'step': 1066, 'epoch': 1} {'type': 'loss', 'content': 0.01032306905835867, 'timestamp': '2025-09-10 02:27:13.653395', 'step': 1067, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:13.682021', 'step': 1067, 'epoch': 1} {'type': 'loss', 'content': 0.029713327065110207, 'timestamp': '2025-09-10 02:27:13.705471', 'step': 1068, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:13.734395', 'step': 1068, 'epoch': 1} {'type': 'loss', 'content': 0.001077897846698761, 'timestamp': '2025-09-10 02:27:13.736210', 'step': 1069, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:13.765096', 'step': 1069, 'epoch': 1} {'type': 'loss', 'content': 0.009043855592608452, 'timestamp': '2025-09-10 02:27:13.766977', 'step': 1070, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:13.796150', 'step': 1070, 'epoch': 1} {'type': 'loss', 'content': 0.025214748457074165, 'timestamp': '2025-09-10 02:27:13.797693', 'step': 1071, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:13.826536', 'step': 1071, 'epoch': 1} {'type': 'loss', 'content': 0.013453769497573376, 'timestamp': '2025-09-10 02:27:13.849910', 'step': 1072, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:13.879541', 'step': 1072, 'epoch': 1} {'type': 'loss', 'content': 0.03888639435172081, 'timestamp': '2025-09-10 02:27:13.881380', 'step': 1073, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:13.910223', 'step': 1073, 'epoch': 1} {'type': 'loss', 'content': 0.019313322380185127, 'timestamp': '2025-09-10 02:27:13.912568', 'step': 1074, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:13.941397', 'step': 1074, 'epoch': 1} {'type': 'loss', 'content': 0.0487765371799469, 'timestamp': '2025-09-10 02:27:13.943313', 'step': 1075, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:13.972421', 'step': 1075, 'epoch': 1} {'type': 'loss', 'content': 0.10099424421787262, 'timestamp': '2025-09-10 02:27:13.995743', 'step': 1076, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:14.024605', 'step': 1076, 'epoch': 1} {'type': 'loss', 'content': 0.018047673627734184, 'timestamp': '2025-09-10 02:27:14.026385', 'step': 1077, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:14.054914', 'step': 1077, 'epoch': 1} {'type': 'loss', 'content': 0.02523096464574337, 'timestamp': '2025-09-10 02:27:14.056844', 'step': 1078, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:14.086109', 'step': 1078, 'epoch': 1} {'type': 'loss', 'content': 0.021651145070791245, 'timestamp': '2025-09-10 02:27:14.087975', 'step': 1079, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:14.116540', 'step': 1079, 'epoch': 1} {'type': 'loss', 'content': 0.03117157705128193, 'timestamp': '2025-09-10 02:27:14.139838', 'step': 1080, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:14.168137', 'step': 1080, 'epoch': 1} {'type': 'loss', 'content': 0.012788075022399426, 'timestamp': '2025-09-10 02:27:14.169866', 'step': 1081, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:14.198437', 'step': 1081, 'epoch': 1} {'type': 'loss', 'content': 0.03557702153921127, 'timestamp': '2025-09-10 02:27:14.200196', 'step': 1082, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:14.228923', 'step': 1082, 'epoch': 1} {'type': 'loss', 'content': 0.015616429038345814, 'timestamp': '2025-09-10 02:27:14.230662', 'step': 1083, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:14.258895', 'step': 1083, 'epoch': 1} {'type': 'loss', 'content': 0.017235806211829185, 'timestamp': '2025-09-10 02:27:14.282047', 'step': 1084, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:14.310957', 'step': 1084, 'epoch': 1} {'type': 'loss', 'content': 0.023321127519011497, 'timestamp': '2025-09-10 02:27:14.313119', 'step': 1085, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:14.341204', 'step': 1085, 'epoch': 1} {'type': 'loss', 'content': 0.019345298409461975, 'timestamp': '2025-09-10 02:27:14.343044', 'step': 1086, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:14.371907', 'step': 1086, 'epoch': 1} {'type': 'loss', 'content': 0.014294320717453957, 'timestamp': '2025-09-10 02:27:14.373753', 'step': 1087, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:14.402377', 'step': 1087, 'epoch': 1} {'type': 'loss', 'content': 0.017413537949323654, 'timestamp': '2025-09-10 02:27:14.425751', 'step': 1088, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:14.454139', 'step': 1088, 'epoch': 1} {'type': 'loss', 'content': 0.010392394848167896, 'timestamp': '2025-09-10 02:27:14.455815', 'step': 1089, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:14.484133', 'step': 1089, 'epoch': 1} {'type': 'loss', 'content': 0.03324350342154503, 'timestamp': '2025-09-10 02:27:14.485777', 'step': 1090, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:14.514360', 'step': 1090, 'epoch': 1} {'type': 'loss', 'content': 0.0070959129370749, 'timestamp': '2025-09-10 02:27:14.516005', 'step': 1091, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:14.545060', 'step': 1091, 'epoch': 1} {'type': 'loss', 'content': 0.04243098571896553, 'timestamp': '2025-09-10 02:27:14.568407', 'step': 1092, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:14.597655', 'step': 1092, 'epoch': 1} {'type': 'loss', 'content': 0.025557631626725197, 'timestamp': '2025-09-10 02:27:14.599503', 'step': 1093, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:14.628391', 'step': 1093, 'epoch': 1} {'type': 'loss', 'content': 0.04200953245162964, 'timestamp': '2025-09-10 02:27:14.630385', 'step': 1094, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:14.659004', 'step': 1094, 'epoch': 1} {'type': 'loss', 'content': 0.05213457718491554, 'timestamp': '2025-09-10 02:27:14.660982', 'step': 1095, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:14.689571', 'step': 1095, 'epoch': 1} {'type': 'loss', 'content': 0.023358408361673355, 'timestamp': '2025-09-10 02:27:14.712920', 'step': 1096, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:14.741454', 'step': 1096, 'epoch': 1} {'type': 'loss', 'content': 0.013042804785072803, 'timestamp': '2025-09-10 02:27:14.743258', 'step': 1097, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:14.772438', 'step': 1097, 'epoch': 1} {'type': 'loss', 'content': 0.035700105130672455, 'timestamp': '2025-09-10 02:27:14.774267', 'step': 1098, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:14.803387', 'step': 1098, 'epoch': 1} {'type': 'loss', 'content': 0.08563186228275299, 'timestamp': '2025-09-10 02:27:14.805300', 'step': 1099, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:14.834341', 'step': 1099, 'epoch': 1} {'type': 'loss', 'content': 0.036948930472135544, 'timestamp': '2025-09-10 02:27:14.857451', 'step': 1100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:14.886200', 'step': 1100, 'epoch': 1} {'type': 'loss', 'content': 0.05217735096812248, 'timestamp': '2025-09-10 02:27:14.888010', 'step': 1101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:14.916899', 'step': 1101, 'epoch': 1} {'type': 'loss', 'content': 0.0390712209045887, 'timestamp': '2025-09-10 02:27:14.918478', 'step': 1102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:14.947274', 'step': 1102, 'epoch': 1} {'type': 'loss', 'content': 0.058155059814453125, 'timestamp': '2025-09-10 02:27:14.948974', 'step': 1103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:14.977620', 'step': 1103, 'epoch': 1} {'type': 'loss', 'content': 0.0360332615673542, 'timestamp': '2025-09-10 02:27:15.000831', 'step': 1104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:15.029488', 'step': 1104, 'epoch': 1} {'type': 'loss', 'content': 0.024571111425757408, 'timestamp': '2025-09-10 02:27:15.031350', 'step': 1105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:15.060326', 'step': 1105, 'epoch': 1} {'type': 'loss', 'content': 0.055816736072301865, 'timestamp': '2025-09-10 02:27:15.061956', 'step': 1106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:15.090646', 'step': 1106, 'epoch': 1} {'type': 'loss', 'content': 0.04789220914244652, 'timestamp': '2025-09-10 02:27:15.092463', 'step': 1107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:15.121016', 'step': 1107, 'epoch': 1} {'type': 'loss', 'content': 0.0074346489273011684, 'timestamp': '2025-09-10 02:27:15.149730', 'step': 1108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:15.178898', 'step': 1108, 'epoch': 1} {'type': 'loss', 'content': 0.015395489521324635, 'timestamp': '2025-09-10 02:27:15.180827', 'step': 1109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:15.209715', 'step': 1109, 'epoch': 1} {'type': 'loss', 'content': 0.018152425065636635, 'timestamp': '2025-09-10 02:27:15.211581', 'step': 1110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:15.240425', 'step': 1110, 'epoch': 1} {'type': 'loss', 'content': 0.045093268156051636, 'timestamp': '2025-09-10 02:27:15.242189', 'step': 1111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:15.271313', 'step': 1111, 'epoch': 1} {'type': 'loss', 'content': 0.029312655329704285, 'timestamp': '2025-09-10 02:27:15.294467', 'step': 1112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:15.322636', 'step': 1112, 'epoch': 1} {'type': 'loss', 'content': 0.016746576875448227, 'timestamp': '2025-09-10 02:27:15.324478', 'step': 1113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:15.353145', 'step': 1113, 'epoch': 1} {'type': 'loss', 'content': 0.038051072508096695, 'timestamp': '2025-09-10 02:27:15.356001', 'step': 1114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:15.384523', 'step': 1114, 'epoch': 1} {'type': 'loss', 'content': 0.02798115275800228, 'timestamp': '2025-09-10 02:27:15.387102', 'step': 1115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:15.415815', 'step': 1115, 'epoch': 1} {'type': 'loss', 'content': 0.01591372862458229, 'timestamp': '2025-09-10 02:27:15.438921', 'step': 1116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:15.467468', 'step': 1116, 'epoch': 1} {'type': 'loss', 'content': 0.021558169275522232, 'timestamp': '2025-09-10 02:27:15.469338', 'step': 1117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:15.497956', 'step': 1117, 'epoch': 1} {'type': 'loss', 'content': 0.01798681728541851, 'timestamp': '2025-09-10 02:27:15.499582', 'step': 1118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:15.532023', 'step': 1118, 'epoch': 1} {'type': 'loss', 'content': 0.03604728356003761, 'timestamp': '2025-09-10 02:27:15.533881', 'step': 1119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:15.562489', 'step': 1119, 'epoch': 1} {'type': 'loss', 'content': 0.052804116159677505, 'timestamp': '2025-09-10 02:27:15.585704', 'step': 1120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:15.614485', 'step': 1120, 'epoch': 1} {'type': 'loss', 'content': 0.06106474995613098, 'timestamp': '2025-09-10 02:27:15.616366', 'step': 1121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:15.645155', 'step': 1121, 'epoch': 1} {'type': 'loss', 'content': 0.050376616418361664, 'timestamp': '2025-09-10 02:27:15.646873', 'step': 1122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:15.675519', 'step': 1122, 'epoch': 1} {'type': 'loss', 'content': 0.028567716479301453, 'timestamp': '2025-09-10 02:27:15.677583', 'step': 1123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:15.706290', 'step': 1123, 'epoch': 1} {'type': 'loss', 'content': 0.017581339925527573, 'timestamp': '2025-09-10 02:27:15.729644', 'step': 1124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:15.758894', 'step': 1124, 'epoch': 1} {'type': 'loss', 'content': 0.06794560700654984, 'timestamp': '2025-09-10 02:27:15.761800', 'step': 1125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:15.791419', 'step': 1125, 'epoch': 1} {'type': 'loss', 'content': 0.023648962378501892, 'timestamp': '2025-09-10 02:27:15.793250', 'step': 1126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:15.823279', 'step': 1126, 'epoch': 1} {'type': 'loss', 'content': 0.014147087931632996, 'timestamp': '2025-09-10 02:27:15.824878', 'step': 1127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:15.855706', 'step': 1127, 'epoch': 1} {'type': 'loss', 'content': 0.02838747762143612, 'timestamp': '2025-09-10 02:27:15.879147', 'step': 1128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:15.908398', 'step': 1128, 'epoch': 1} {'type': 'loss', 'content': 0.024299990385770798, 'timestamp': '2025-09-10 02:27:15.909995', 'step': 1129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:15.940091', 'step': 1129, 'epoch': 1} {'type': 'loss', 'content': 0.040718890726566315, 'timestamp': '2025-09-10 02:27:15.941956', 'step': 1130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:15.970668', 'step': 1130, 'epoch': 1} {'type': 'loss', 'content': 0.026956811547279358, 'timestamp': '2025-09-10 02:27:15.972597', 'step': 1131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:16.001502', 'step': 1131, 'epoch': 1} {'type': 'loss', 'content': 0.020035987719893456, 'timestamp': '2025-09-10 02:27:16.031771', 'step': 1132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:16.062656', 'step': 1132, 'epoch': 1} {'type': 'loss', 'content': 0.020776256918907166, 'timestamp': '2025-09-10 02:27:16.064529', 'step': 1133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:16.093561', 'step': 1133, 'epoch': 1} {'type': 'loss', 'content': 0.03961186483502388, 'timestamp': '2025-09-10 02:27:16.095439', 'step': 1134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:16.124253', 'step': 1134, 'epoch': 1} {'type': 'loss', 'content': 0.03113470785319805, 'timestamp': '2025-09-10 02:27:16.126083', 'step': 1135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:16.154715', 'step': 1135, 'epoch': 1} {'type': 'loss', 'content': 0.05010887607932091, 'timestamp': '2025-09-10 02:27:16.177947', 'step': 1136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:16.206794', 'step': 1136, 'epoch': 1} {'type': 'loss', 'content': 0.028908152133226395, 'timestamp': '2025-09-10 02:27:16.209886', 'step': 1137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:16.242380', 'step': 1137, 'epoch': 1} {'type': 'loss', 'content': 0.043085407465696335, 'timestamp': '2025-09-10 02:27:16.244242', 'step': 1138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:16.273489', 'step': 1138, 'epoch': 1} {'type': 'loss', 'content': 0.017086556181311607, 'timestamp': '2025-09-10 02:27:16.275283', 'step': 1139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:16.304018', 'step': 1139, 'epoch': 1} {'type': 'loss', 'content': 0.04856061562895775, 'timestamp': '2025-09-10 02:27:16.327420', 'step': 1140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:16.356166', 'step': 1140, 'epoch': 1} {'type': 'loss', 'content': 0.03742309287190437, 'timestamp': '2025-09-10 02:27:16.358018', 'step': 1141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:16.386671', 'step': 1141, 'epoch': 1} {'type': 'loss', 'content': 0.05607035011053085, 'timestamp': '2025-09-10 02:27:16.388548', 'step': 1142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:16.416932', 'step': 1142, 'epoch': 1} {'type': 'loss', 'content': 0.019550606608390808, 'timestamp': '2025-09-10 02:27:16.419157', 'step': 1143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:16.447648', 'step': 1143, 'epoch': 1} {'type': 'loss', 'content': 0.02041657827794552, 'timestamp': '2025-09-10 02:27:16.471015', 'step': 1144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:16.499498', 'step': 1144, 'epoch': 1} {'type': 'loss', 'content': 0.008098459802567959, 'timestamp': '2025-09-10 02:27:16.501178', 'step': 1145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:16.529696', 'step': 1145, 'epoch': 1} {'type': 'loss', 'content': 0.015380357392132282, 'timestamp': '2025-09-10 02:27:16.531550', 'step': 1146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:16.560308', 'step': 1146, 'epoch': 1} {'type': 'loss', 'content': 0.02163703925907612, 'timestamp': '2025-09-10 02:27:16.561944', 'step': 1147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:16.590482', 'step': 1147, 'epoch': 1} {'type': 'loss', 'content': 0.035727955400943756, 'timestamp': '2025-09-10 02:27:16.613924', 'step': 1148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:16.642988', 'step': 1148, 'epoch': 1} {'type': 'loss', 'content': 0.009822634980082512, 'timestamp': '2025-09-10 02:27:16.644561', 'step': 1149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:16.672969', 'step': 1149, 'epoch': 1} {'type': 'loss', 'content': 0.005142558831721544, 'timestamp': '2025-09-10 02:27:16.674810', 'step': 1150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:16.703610', 'step': 1150, 'epoch': 1} {'type': 'loss', 'content': 0.03525710478425026, 'timestamp': '2025-09-10 02:27:16.705199', 'step': 1151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:16.733783', 'step': 1151, 'epoch': 1} {'type': 'loss', 'content': 0.0724150687456131, 'timestamp': '2025-09-10 02:27:16.757345', 'step': 1152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:16.786786', 'step': 1152, 'epoch': 1} {'type': 'loss', 'content': 0.06361201405525208, 'timestamp': '2025-09-10 02:27:16.788835', 'step': 1153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:16.817404', 'step': 1153, 'epoch': 1} {'type': 'loss', 'content': 0.06277791410684586, 'timestamp': '2025-09-10 02:27:16.819775', 'step': 1154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:16.849183', 'step': 1154, 'epoch': 1} {'type': 'loss', 'content': 0.06249118596315384, 'timestamp': '2025-09-10 02:27:16.851034', 'step': 1155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:16.879697', 'step': 1155, 'epoch': 1} {'type': 'loss', 'content': 0.023662757128477097, 'timestamp': '2025-09-10 02:27:16.903181', 'step': 1156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:16.932319', 'step': 1156, 'epoch': 1} {'type': 'loss', 'content': 0.028213325887918472, 'timestamp': '2025-09-10 02:27:16.934216', 'step': 1157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:16.962962', 'step': 1157, 'epoch': 1} {'type': 'loss', 'content': 0.04014962911605835, 'timestamp': '2025-09-10 02:27:16.964595', 'step': 1158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:16.993083', 'step': 1158, 'epoch': 1} {'type': 'loss', 'content': 0.019419433549046516, 'timestamp': '2025-09-10 02:27:16.994968', 'step': 1159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:17.023560', 'step': 1159, 'epoch': 1} {'type': 'loss', 'content': 0.012511305510997772, 'timestamp': '2025-09-10 02:27:17.046738', 'step': 1160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:17.075291', 'step': 1160, 'epoch': 1} {'type': 'loss', 'content': 0.04608547315001488, 'timestamp': '2025-09-10 02:27:17.076989', 'step': 1161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:17.105956', 'step': 1161, 'epoch': 1} {'type': 'loss', 'content': 0.0226228516548872, 'timestamp': '2025-09-10 02:27:17.107671', 'step': 1162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:17.136318', 'step': 1162, 'epoch': 1} {'type': 'loss', 'content': 0.010072730481624603, 'timestamp': '2025-09-10 02:27:17.138082', 'step': 1163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:17.166945', 'step': 1163, 'epoch': 1} {'type': 'loss', 'content': 0.011195765808224678, 'timestamp': '2025-09-10 02:27:17.190368', 'step': 1164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:17.220006', 'step': 1164, 'epoch': 1} {'type': 'loss', 'content': 0.015673991292715073, 'timestamp': '2025-09-10 02:27:17.222011', 'step': 1165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:17.250591', 'step': 1165, 'epoch': 1} {'type': 'loss', 'content': 0.033464204519987106, 'timestamp': '2025-09-10 02:27:17.252615', 'step': 1166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:17.281724', 'step': 1166, 'epoch': 1} {'type': 'loss', 'content': 0.022387336939573288, 'timestamp': '2025-09-10 02:27:17.283920', 'step': 1167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:17.312436', 'step': 1167, 'epoch': 1} {'type': 'loss', 'content': 0.0325801745057106, 'timestamp': '2025-09-10 02:27:17.335682', 'step': 1168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:17.364525', 'step': 1168, 'epoch': 1} {'type': 'loss', 'content': 0.06113835796713829, 'timestamp': '2025-09-10 02:27:17.366450', 'step': 1169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:17.395501', 'step': 1169, 'epoch': 1} {'type': 'loss', 'content': 0.011920975521206856, 'timestamp': '2025-09-10 02:27:17.397258', 'step': 1170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:17.426245', 'step': 1170, 'epoch': 1} {'type': 'loss', 'content': 0.021274421364068985, 'timestamp': '2025-09-10 02:27:17.428356', 'step': 1171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:17.457932', 'step': 1171, 'epoch': 1} {'type': 'loss', 'content': 0.04234448820352554, 'timestamp': '2025-09-10 02:27:17.481314', 'step': 1172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:17.510844', 'step': 1172, 'epoch': 1} {'type': 'loss', 'content': 0.027071913704276085, 'timestamp': '2025-09-10 02:27:17.512722', 'step': 1173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:17.541342', 'step': 1173, 'epoch': 1} {'type': 'loss', 'content': 0.013425322249531746, 'timestamp': '2025-09-10 02:27:17.542903', 'step': 1174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:17.570976', 'step': 1174, 'epoch': 1} {'type': 'loss', 'content': 0.007693841587752104, 'timestamp': '2025-09-10 02:27:17.573902', 'step': 1175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:17.606463', 'step': 1175, 'epoch': 1} {'type': 'loss', 'content': 0.027679383754730225, 'timestamp': '2025-09-10 02:27:17.629475', 'step': 1176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:17.658064', 'step': 1176, 'epoch': 1} {'type': 'loss', 'content': 0.05865621566772461, 'timestamp': '2025-09-10 02:27:17.661861', 'step': 1177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:17.690721', 'step': 1177, 'epoch': 1} {'type': 'loss', 'content': 0.015808407217264175, 'timestamp': '2025-09-10 02:27:17.695697', 'step': 1178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:17.724286', 'step': 1178, 'epoch': 1} {'type': 'loss', 'content': 0.05676742270588875, 'timestamp': '2025-09-10 02:27:17.725981', 'step': 1179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:17.754519', 'step': 1179, 'epoch': 1} {'type': 'loss', 'content': 0.039605528116226196, 'timestamp': '2025-09-10 02:27:17.777900', 'step': 1180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:17.809571', 'step': 1180, 'epoch': 1} {'type': 'loss', 'content': 0.023134754970669746, 'timestamp': '2025-09-10 02:27:17.811603', 'step': 1181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:17.840672', 'step': 1181, 'epoch': 1} {'type': 'loss', 'content': 0.019693270325660706, 'timestamp': '2025-09-10 02:27:17.845192', 'step': 1182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:17.874333', 'step': 1182, 'epoch': 1} {'type': 'loss', 'content': 0.014443730004131794, 'timestamp': '2025-09-10 02:27:17.877418', 'step': 1183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:17.908136', 'step': 1183, 'epoch': 1} {'type': 'loss', 'content': 0.06374738365411758, 'timestamp': '2025-09-10 02:27:17.932824', 'step': 1184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:17.961265', 'step': 1184, 'epoch': 1} {'type': 'loss', 'content': 0.06489721685647964, 'timestamp': '2025-09-10 02:27:17.963135', 'step': 1185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:17.991928', 'step': 1185, 'epoch': 1} {'type': 'loss', 'content': 0.02132502570748329, 'timestamp': '2025-09-10 02:27:17.995007', 'step': 1186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:18.023920', 'step': 1186, 'epoch': 1} {'type': 'loss', 'content': 0.045552726835012436, 'timestamp': '2025-09-10 02:27:18.025883', 'step': 1187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:18.072998', 'step': 1187, 'epoch': 1} {'type': 'loss', 'content': 0.025895103812217712, 'timestamp': '2025-09-10 02:27:18.096191', 'step': 1188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:18.130616', 'step': 1188, 'epoch': 1} {'type': 'loss', 'content': 0.042253535240888596, 'timestamp': '2025-09-10 02:27:18.132789', 'step': 1189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:18.161157', 'step': 1189, 'epoch': 1} {'type': 'loss', 'content': 0.0021609091199934483, 'timestamp': '2025-09-10 02:27:18.162981', 'step': 1190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:18.191456', 'step': 1190, 'epoch': 1} {'type': 'loss', 'content': 0.051309734582901, 'timestamp': '2025-09-10 02:27:18.193300', 'step': 1191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:18.221795', 'step': 1191, 'epoch': 1} {'type': 'loss', 'content': 0.027675259858369827, 'timestamp': '2025-09-10 02:27:18.248515', 'step': 1192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:18.277368', 'step': 1192, 'epoch': 1} {'type': 'loss', 'content': 0.031873274594545364, 'timestamp': '2025-09-10 02:27:18.279103', 'step': 1193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:18.308468', 'step': 1193, 'epoch': 1} {'type': 'loss', 'content': 0.05416679382324219, 'timestamp': '2025-09-10 02:27:18.310571', 'step': 1194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:18.344175', 'step': 1194, 'epoch': 1} {'type': 'loss', 'content': 0.06558862328529358, 'timestamp': '2025-09-10 02:27:18.346231', 'step': 1195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:18.376459', 'step': 1195, 'epoch': 1} {'type': 'loss', 'content': 0.03938909247517586, 'timestamp': '2025-09-10 02:27:18.399756', 'step': 1196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:18.429267', 'step': 1196, 'epoch': 1} {'type': 'loss', 'content': 0.06032661348581314, 'timestamp': '2025-09-10 02:27:18.431733', 'step': 1197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:18.460381', 'step': 1197, 'epoch': 1} {'type': 'loss', 'content': 0.06398675590753555, 'timestamp': '2025-09-10 02:27:18.466894', 'step': 1198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:18.501425', 'step': 1198, 'epoch': 1} {'type': 'loss', 'content': 0.009640135802328587, 'timestamp': '2025-09-10 02:27:18.502991', 'step': 1199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:18.531851', 'step': 1199, 'epoch': 1} {'type': 'loss', 'content': 0.018345508724451065, 'timestamp': '2025-09-10 02:27:18.554961', 'step': 1200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:18.584382', 'step': 1200, 'epoch': 1} {'type': 'loss', 'content': 0.08645867556333542, 'timestamp': '2025-09-10 02:27:18.586281', 'step': 1201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:18.615871', 'step': 1201, 'epoch': 1} {'type': 'loss', 'content': 0.022902602329850197, 'timestamp': '2025-09-10 02:27:18.617826', 'step': 1202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:18.647165', 'step': 1202, 'epoch': 1} {'type': 'loss', 'content': 0.033925969153642654, 'timestamp': '2025-09-10 02:27:18.648810', 'step': 1203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:18.678337', 'step': 1203, 'epoch': 1} {'type': 'loss', 'content': 0.04151107743382454, 'timestamp': '2025-09-10 02:27:18.701792', 'step': 1204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:18.731573', 'step': 1204, 'epoch': 1} {'type': 'loss', 'content': 0.04618053510785103, 'timestamp': '2025-09-10 02:27:18.733370', 'step': 1205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:18.762981', 'step': 1205, 'epoch': 1} {'type': 'loss', 'content': 0.025517631322145462, 'timestamp': '2025-09-10 02:27:18.764945', 'step': 1206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:18.794351', 'step': 1206, 'epoch': 1} {'type': 'loss', 'content': 0.007069730665534735, 'timestamp': '2025-09-10 02:27:18.797417', 'step': 1207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:18.828343', 'step': 1207, 'epoch': 1} {'type': 'loss', 'content': 0.0422409251332283, 'timestamp': '2025-09-10 02:27:18.851593', 'step': 1208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:18.880443', 'step': 1208, 'epoch': 1} {'type': 'loss', 'content': 0.0322219543159008, 'timestamp': '2025-09-10 02:27:18.882417', 'step': 1209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:18.911728', 'step': 1209, 'epoch': 1} {'type': 'loss', 'content': 0.03617298603057861, 'timestamp': '2025-09-10 02:27:18.913665', 'step': 1210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:18.942380', 'step': 1210, 'epoch': 1} {'type': 'loss', 'content': 0.03871258348226547, 'timestamp': '2025-09-10 02:27:18.944204', 'step': 1211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:18.973455', 'step': 1211, 'epoch': 1} {'type': 'loss', 'content': 0.025040697306394577, 'timestamp': '2025-09-10 02:27:18.996689', 'step': 1212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:19.025611', 'step': 1212, 'epoch': 1} {'type': 'loss', 'content': 0.023504246026277542, 'timestamp': '2025-09-10 02:27:19.027880', 'step': 1213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:19.056355', 'step': 1213, 'epoch': 1} {'type': 'loss', 'content': 0.05998089909553528, 'timestamp': '2025-09-10 02:27:19.058082', 'step': 1214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:19.086744', 'step': 1214, 'epoch': 1} {'type': 'loss', 'content': 0.030563218519091606, 'timestamp': '2025-09-10 02:27:19.088884', 'step': 1215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:19.117472', 'step': 1215, 'epoch': 1} {'type': 'loss', 'content': 0.03772884979844093, 'timestamp': '2025-09-10 02:27:19.140764', 'step': 1216, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1582003754624}], 'timestamp': '2025-09-10 02:27:21.037220', 'step': 1216, 'epoch': 1} {'type': 'pplx', 'content': 2289026.425039921, 'timestamp': '2025-09-10 02:27:21.039105', 'step': 1216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:21.066171', 'step': 1216, 'epoch': 1} {'type': 'loss', 'content': 0.026032647117972374, 'timestamp': '2025-09-10 02:27:21.068040', 'step': 1217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:21.097122', 'step': 1217, 'epoch': 1} {'type': 'loss', 'content': 0.04040813446044922, 'timestamp': '2025-09-10 02:27:21.099051', 'step': 1218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:21.127652', 'step': 1218, 'epoch': 1} {'type': 'loss', 'content': 0.02689814381301403, 'timestamp': '2025-09-10 02:27:21.129663', 'step': 1219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:21.158619', 'step': 1219, 'epoch': 1} {'type': 'loss', 'content': 0.02480989508330822, 'timestamp': '2025-09-10 02:27:21.182128', 'step': 1220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:21.210985', 'step': 1220, 'epoch': 1} {'type': 'loss', 'content': 0.03290778771042824, 'timestamp': '2025-09-10 02:27:21.212803', 'step': 1221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:21.241736', 'step': 1221, 'epoch': 1} {'type': 'loss', 'content': 0.056918807327747345, 'timestamp': '2025-09-10 02:27:21.243313', 'step': 1222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:21.271979', 'step': 1222, 'epoch': 1} {'type': 'loss', 'content': 0.035863909870386124, 'timestamp': '2025-09-10 02:27:21.273869', 'step': 1223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:21.302495', 'step': 1223, 'epoch': 1} {'type': 'loss', 'content': 0.026448115706443787, 'timestamp': '2025-09-10 02:27:21.325703', 'step': 1224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:21.354467', 'step': 1224, 'epoch': 1} {'type': 'loss', 'content': 0.0297726821154356, 'timestamp': '2025-09-10 02:27:21.356185', 'step': 1225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:21.385415', 'step': 1225, 'epoch': 1} {'type': 'loss', 'content': 0.03371063619852066, 'timestamp': '2025-09-10 02:27:21.387182', 'step': 1226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:21.415395', 'step': 1226, 'epoch': 1} {'type': 'loss', 'content': 0.0271589495241642, 'timestamp': '2025-09-10 02:27:21.417243', 'step': 1227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:21.445695', 'step': 1227, 'epoch': 1} {'type': 'loss', 'content': 0.03135223314166069, 'timestamp': '2025-09-10 02:27:21.468981', 'step': 1228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:21.497808', 'step': 1228, 'epoch': 1} {'type': 'loss', 'content': 0.0368199348449707, 'timestamp': '2025-09-10 02:27:21.499522', 'step': 1229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:21.528536', 'step': 1229, 'epoch': 1} {'type': 'loss', 'content': 0.025787577033042908, 'timestamp': '2025-09-10 02:27:21.530851', 'step': 1230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:21.559139', 'step': 1230, 'epoch': 1} {'type': 'loss', 'content': 0.04452862963080406, 'timestamp': '2025-09-10 02:27:21.560909', 'step': 1231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:21.589342', 'step': 1231, 'epoch': 1} {'type': 'loss', 'content': 0.04641091451048851, 'timestamp': '2025-09-10 02:27:21.612489', 'step': 1232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:21.641342', 'step': 1232, 'epoch': 1} {'type': 'loss', 'content': 0.06339367479085922, 'timestamp': '2025-09-10 02:27:21.643099', 'step': 1233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:21.671645', 'step': 1233, 'epoch': 1} {'type': 'loss', 'content': 0.012783932499587536, 'timestamp': '2025-09-10 02:27:21.673567', 'step': 1234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:21.702110', 'step': 1234, 'epoch': 1} {'type': 'loss', 'content': 0.005669086240231991, 'timestamp': '2025-09-10 02:27:21.703830', 'step': 1235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:21.732556', 'step': 1235, 'epoch': 1} {'type': 'loss', 'content': 0.02278485894203186, 'timestamp': '2025-09-10 02:27:21.755592', 'step': 1236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:21.784522', 'step': 1236, 'epoch': 1} {'type': 'loss', 'content': 0.029380736872553825, 'timestamp': '2025-09-10 02:27:21.786524', 'step': 1237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:21.815346', 'step': 1237, 'epoch': 1} {'type': 'loss', 'content': 0.05142504349350929, 'timestamp': '2025-09-10 02:27:21.817031', 'step': 1238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:21.845793', 'step': 1238, 'epoch': 1} {'type': 'loss', 'content': 0.039533525705337524, 'timestamp': '2025-09-10 02:27:21.847726', 'step': 1239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:21.876616', 'step': 1239, 'epoch': 1} {'type': 'loss', 'content': 0.03197439759969711, 'timestamp': '2025-09-10 02:27:21.899708', 'step': 1240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:21.928794', 'step': 1240, 'epoch': 1} {'type': 'loss', 'content': 0.05023592710494995, 'timestamp': '2025-09-10 02:27:21.930686', 'step': 1241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:21.960377', 'step': 1241, 'epoch': 1} {'type': 'loss', 'content': 0.04292667284607887, 'timestamp': '2025-09-10 02:27:21.962103', 'step': 1242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:21.990690', 'step': 1242, 'epoch': 1} {'type': 'loss', 'content': 0.059419870376586914, 'timestamp': '2025-09-10 02:27:21.992499', 'step': 1243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:22.021517', 'step': 1243, 'epoch': 1} {'type': 'loss', 'content': 0.03515687957406044, 'timestamp': '2025-09-10 02:27:22.044653', 'step': 1244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:22.073481', 'step': 1244, 'epoch': 1} {'type': 'loss', 'content': 0.04057113081216812, 'timestamp': '2025-09-10 02:27:22.075201', 'step': 1245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:22.103971', 'step': 1245, 'epoch': 1} {'type': 'loss', 'content': 0.057592350989580154, 'timestamp': '2025-09-10 02:27:22.106006', 'step': 1246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:22.135112', 'step': 1246, 'epoch': 1} {'type': 'loss', 'content': 0.04547825828194618, 'timestamp': '2025-09-10 02:27:22.136826', 'step': 1247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:22.165294', 'step': 1247, 'epoch': 1} {'type': 'loss', 'content': 0.03685108572244644, 'timestamp': '2025-09-10 02:27:22.188846', 'step': 1248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:22.217662', 'step': 1248, 'epoch': 1} {'type': 'loss', 'content': 0.046049814671278, 'timestamp': '2025-09-10 02:27:22.219523', 'step': 1249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:22.248592', 'step': 1249, 'epoch': 1} {'type': 'loss', 'content': 0.03411615639925003, 'timestamp': '2025-09-10 02:27:22.250463', 'step': 1250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:22.279405', 'step': 1250, 'epoch': 1} {'type': 'loss', 'content': 0.036534737795591354, 'timestamp': '2025-09-10 02:27:22.281283', 'step': 1251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:22.309914', 'step': 1251, 'epoch': 1} {'type': 'loss', 'content': 0.060973696410655975, 'timestamp': '2025-09-10 02:27:22.333292', 'step': 1252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:22.361797', 'step': 1252, 'epoch': 1} {'type': 'loss', 'content': 0.029213469475507736, 'timestamp': '2025-09-10 02:27:22.363470', 'step': 1253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:22.392225', 'step': 1253, 'epoch': 1} {'type': 'loss', 'content': 0.055876441299915314, 'timestamp': '2025-09-10 02:27:22.393797', 'step': 1254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:22.422080', 'step': 1254, 'epoch': 1} {'type': 'loss', 'content': 0.035937659442424774, 'timestamp': '2025-09-10 02:27:22.423958', 'step': 1255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:22.452559', 'step': 1255, 'epoch': 1} {'type': 'loss', 'content': 0.06300339847803116, 'timestamp': '2025-09-10 02:27:22.475653', 'step': 1256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:22.504634', 'step': 1256, 'epoch': 1} {'type': 'loss', 'content': 0.06337703764438629, 'timestamp': '2025-09-10 02:27:22.506290', 'step': 1257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:22.535092', 'step': 1257, 'epoch': 1} {'type': 'loss', 'content': 0.02344011329114437, 'timestamp': '2025-09-10 02:27:22.536912', 'step': 1258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:22.565690', 'step': 1258, 'epoch': 1} {'type': 'loss', 'content': 0.02030993066728115, 'timestamp': '2025-09-10 02:27:22.567405', 'step': 1259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:22.596200', 'step': 1259, 'epoch': 1} {'type': 'loss', 'content': 0.07915498316287994, 'timestamp': '2025-09-10 02:27:22.619572', 'step': 1260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:22.648198', 'step': 1260, 'epoch': 1} {'type': 'loss', 'content': 0.03038150444626808, 'timestamp': '2025-09-10 02:27:22.649817', 'step': 1261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:22.678375', 'step': 1261, 'epoch': 1} {'type': 'loss', 'content': 0.02446349337697029, 'timestamp': '2025-09-10 02:27:22.680206', 'step': 1262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:22.708899', 'step': 1262, 'epoch': 1} {'type': 'loss', 'content': 0.03944491222500801, 'timestamp': '2025-09-10 02:27:22.710716', 'step': 1263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:22.739464', 'step': 1263, 'epoch': 1} {'type': 'loss', 'content': 0.04451688751578331, 'timestamp': '2025-09-10 02:27:22.762678', 'step': 1264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:22.791541', 'step': 1264, 'epoch': 1} {'type': 'loss', 'content': 0.023774994537234306, 'timestamp': '2025-09-10 02:27:22.793290', 'step': 1265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:22.822023', 'step': 1265, 'epoch': 1} {'type': 'loss', 'content': 0.06238982081413269, 'timestamp': '2025-09-10 02:27:22.823837', 'step': 1266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:22.852597', 'step': 1266, 'epoch': 1} {'type': 'loss', 'content': 0.01891942135989666, 'timestamp': '2025-09-10 02:27:22.854419', 'step': 1267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:22.883347', 'step': 1267, 'epoch': 1} {'type': 'loss', 'content': 0.016256926581263542, 'timestamp': '2025-09-10 02:27:22.906695', 'step': 1268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:22.935439', 'step': 1268, 'epoch': 1} {'type': 'loss', 'content': 0.020205358043313026, 'timestamp': '2025-09-10 02:27:22.937064', 'step': 1269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:22.966635', 'step': 1269, 'epoch': 1} {'type': 'loss', 'content': 0.04541391506791115, 'timestamp': '2025-09-10 02:27:22.972446', 'step': 1270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:23.011426', 'step': 1270, 'epoch': 1} {'type': 'loss', 'content': 0.02258029393851757, 'timestamp': '2025-09-10 02:27:23.013224', 'step': 1271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:23.042267', 'step': 1271, 'epoch': 1} {'type': 'loss', 'content': 0.0459962822496891, 'timestamp': '2025-09-10 02:27:23.065373', 'step': 1272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:23.098320', 'step': 1272, 'epoch': 1} {'type': 'loss', 'content': 0.04839509725570679, 'timestamp': '2025-09-10 02:27:23.100118', 'step': 1273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:23.128600', 'step': 1273, 'epoch': 1} {'type': 'loss', 'content': 0.022308675572276115, 'timestamp': '2025-09-10 02:27:23.136317', 'step': 1274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:23.168579', 'step': 1274, 'epoch': 1} {'type': 'loss', 'content': 0.008223294280469418, 'timestamp': '2025-09-10 02:27:23.170457', 'step': 1275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:23.199084', 'step': 1275, 'epoch': 1} {'type': 'loss', 'content': 0.027793053537607193, 'timestamp': '2025-09-10 02:27:23.222516', 'step': 1276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:23.253899', 'step': 1276, 'epoch': 1} {'type': 'loss', 'content': 0.01381795946508646, 'timestamp': '2025-09-10 02:27:23.255913', 'step': 1277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:23.284641', 'step': 1277, 'epoch': 1} {'type': 'loss', 'content': 0.06890726834535599, 'timestamp': '2025-09-10 02:27:23.286567', 'step': 1278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:23.315360', 'step': 1278, 'epoch': 1} {'type': 'loss', 'content': 0.03059798665344715, 'timestamp': '2025-09-10 02:27:23.316893', 'step': 1279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:23.345475', 'step': 1279, 'epoch': 1} {'type': 'loss', 'content': 0.044452693313360214, 'timestamp': '2025-09-10 02:27:23.368696', 'step': 1280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:23.401098', 'step': 1280, 'epoch': 1} {'type': 'loss', 'content': 0.017186051234602928, 'timestamp': '2025-09-10 02:27:23.403073', 'step': 1281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:23.436685', 'step': 1281, 'epoch': 1} {'type': 'loss', 'content': 0.0315559059381485, 'timestamp': '2025-09-10 02:27:23.440055', 'step': 1282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:23.474771', 'step': 1282, 'epoch': 1} {'type': 'loss', 'content': 0.036582719534635544, 'timestamp': '2025-09-10 02:27:23.476604', 'step': 1283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:23.504947', 'step': 1283, 'epoch': 1} {'type': 'loss', 'content': 0.03221450373530388, 'timestamp': '2025-09-10 02:27:23.528083', 'step': 1284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:23.558025', 'step': 1284, 'epoch': 1} {'type': 'loss', 'content': 0.033226583153009415, 'timestamp': '2025-09-10 02:27:23.562953', 'step': 1285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:23.592128', 'step': 1285, 'epoch': 1} {'type': 'loss', 'content': 0.04618069529533386, 'timestamp': '2025-09-10 02:27:23.594036', 'step': 1286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:23.624690', 'step': 1286, 'epoch': 1} {'type': 'loss', 'content': 0.01848338171839714, 'timestamp': '2025-09-10 02:27:23.626551', 'step': 1287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:23.655198', 'step': 1287, 'epoch': 1} {'type': 'loss', 'content': 0.0044882288202643394, 'timestamp': '2025-09-10 02:27:23.678347', 'step': 1288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:23.707114', 'step': 1288, 'epoch': 1} {'type': 'loss', 'content': 0.04217400401830673, 'timestamp': '2025-09-10 02:27:23.708792', 'step': 1289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:23.741209', 'step': 1289, 'epoch': 1} {'type': 'loss', 'content': 0.0670870840549469, 'timestamp': '2025-09-10 02:27:23.743259', 'step': 1290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:23.772245', 'step': 1290, 'epoch': 1} {'type': 'loss', 'content': 0.08485446870326996, 'timestamp': '2025-09-10 02:27:23.774114', 'step': 1291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:23.803060', 'step': 1291, 'epoch': 1} {'type': 'loss', 'content': 0.07073134928941727, 'timestamp': '2025-09-10 02:27:23.826640', 'step': 1292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:23.855312', 'step': 1292, 'epoch': 1} {'type': 'loss', 'content': 0.02002301998436451, 'timestamp': '2025-09-10 02:27:23.857111', 'step': 1293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:23.885696', 'step': 1293, 'epoch': 1} {'type': 'loss', 'content': 0.06766389310359955, 'timestamp': '2025-09-10 02:27:23.887439', 'step': 1294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:23.916028', 'step': 1294, 'epoch': 1} {'type': 'loss', 'content': 0.03803431987762451, 'timestamp': '2025-09-10 02:27:23.917881', 'step': 1295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:23.946494', 'step': 1295, 'epoch': 1} {'type': 'loss', 'content': 0.03328724578022957, 'timestamp': '2025-09-10 02:27:23.969781', 'step': 1296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:23.998826', 'step': 1296, 'epoch': 1} {'type': 'loss', 'content': 0.021175900474190712, 'timestamp': '2025-09-10 02:27:24.000458', 'step': 1297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:24.029385', 'step': 1297, 'epoch': 1} {'type': 'loss', 'content': 0.011340412311255932, 'timestamp': '2025-09-10 02:27:24.031309', 'step': 1298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:24.061093', 'step': 1298, 'epoch': 1} {'type': 'loss', 'content': 0.027038246393203735, 'timestamp': '2025-09-10 02:27:24.062963', 'step': 1299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:24.091782', 'step': 1299, 'epoch': 1} {'type': 'loss', 'content': 0.023592408746480942, 'timestamp': '2025-09-10 02:27:24.114992', 'step': 1300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:24.144301', 'step': 1300, 'epoch': 1} {'type': 'loss', 'content': 0.024524888023734093, 'timestamp': '2025-09-10 02:27:24.145908', 'step': 1301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:24.174488', 'step': 1301, 'epoch': 1} {'type': 'loss', 'content': 0.03530529886484146, 'timestamp': '2025-09-10 02:27:24.176350', 'step': 1302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:24.205257', 'step': 1302, 'epoch': 1} {'type': 'loss', 'content': 0.013897881843149662, 'timestamp': '2025-09-10 02:27:24.206877', 'step': 1303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:24.235354', 'step': 1303, 'epoch': 1} {'type': 'loss', 'content': 0.011112040840089321, 'timestamp': '2025-09-10 02:27:24.258498', 'step': 1304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:24.287332', 'step': 1304, 'epoch': 1} {'type': 'loss', 'content': 0.015398895367980003, 'timestamp': '2025-09-10 02:27:24.289133', 'step': 1305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:24.318088', 'step': 1305, 'epoch': 1} {'type': 'loss', 'content': 0.0026412815786898136, 'timestamp': '2025-09-10 02:27:24.319913', 'step': 1306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:24.348736', 'step': 1306, 'epoch': 1} {'type': 'loss', 'content': 0.02359415777027607, 'timestamp': '2025-09-10 02:27:24.350653', 'step': 1307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:24.379425', 'step': 1307, 'epoch': 1} {'type': 'loss', 'content': 0.023567870259284973, 'timestamp': '2025-09-10 02:27:24.402747', 'step': 1308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:24.431650', 'step': 1308, 'epoch': 1} {'type': 'loss', 'content': 0.05291704088449478, 'timestamp': '2025-09-10 02:27:24.433423', 'step': 1309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:24.461934', 'step': 1309, 'epoch': 1} {'type': 'loss', 'content': 0.05972858890891075, 'timestamp': '2025-09-10 02:27:24.463571', 'step': 1310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:24.492130', 'step': 1310, 'epoch': 1} {'type': 'loss', 'content': 0.019543515518307686, 'timestamp': '2025-09-10 02:27:24.493856', 'step': 1311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:24.522812', 'step': 1311, 'epoch': 1} {'type': 'loss', 'content': 0.012841133400797844, 'timestamp': '2025-09-10 02:27:24.546268', 'step': 1312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:24.574965', 'step': 1312, 'epoch': 1} {'type': 'loss', 'content': 0.012684354558587074, 'timestamp': '2025-09-10 02:27:24.576889', 'step': 1313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:24.605507', 'step': 1313, 'epoch': 1} {'type': 'loss', 'content': 0.06203369051218033, 'timestamp': '2025-09-10 02:27:24.607346', 'step': 1314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:24.636018', 'step': 1314, 'epoch': 1} {'type': 'loss', 'content': 0.03298981487751007, 'timestamp': '2025-09-10 02:27:24.637697', 'step': 1315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:24.665789', 'step': 1315, 'epoch': 1} {'type': 'loss', 'content': 0.07147373259067535, 'timestamp': '2025-09-10 02:27:24.689182', 'step': 1316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:24.718102', 'step': 1316, 'epoch': 1} {'type': 'loss', 'content': 0.0012829465558752418, 'timestamp': '2025-09-10 02:27:24.720033', 'step': 1317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:24.748278', 'step': 1317, 'epoch': 1} {'type': 'loss', 'content': 0.06342272460460663, 'timestamp': '2025-09-10 02:27:24.750024', 'step': 1318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:24.778675', 'step': 1318, 'epoch': 1} {'type': 'loss', 'content': 0.001892492757178843, 'timestamp': '2025-09-10 02:27:24.780408', 'step': 1319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:24.809008', 'step': 1319, 'epoch': 1} {'type': 'loss', 'content': 0.07816839218139648, 'timestamp': '2025-09-10 02:27:24.832332', 'step': 1320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:24.861108', 'step': 1320, 'epoch': 1} {'type': 'loss', 'content': 0.0809142217040062, 'timestamp': '2025-09-10 02:27:24.862562', 'step': 1321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:24.890969', 'step': 1321, 'epoch': 1} {'type': 'loss', 'content': 0.02756485342979431, 'timestamp': '2025-09-10 02:27:24.892824', 'step': 1322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:24.921260', 'step': 1322, 'epoch': 1} {'type': 'loss', 'content': 0.05552193894982338, 'timestamp': '2025-09-10 02:27:24.922862', 'step': 1323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:24.952011', 'step': 1323, 'epoch': 1} {'type': 'loss', 'content': 0.05214642733335495, 'timestamp': '2025-09-10 02:27:24.975203', 'step': 1324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:25.004458', 'step': 1324, 'epoch': 1} {'type': 'loss', 'content': 0.017305118963122368, 'timestamp': '2025-09-10 02:27:25.006338', 'step': 1325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:25.035966', 'step': 1325, 'epoch': 1} {'type': 'loss', 'content': 0.037766702473163605, 'timestamp': '2025-09-10 02:27:25.037621', 'step': 1326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:25.066325', 'step': 1326, 'epoch': 1} {'type': 'loss', 'content': 0.03526047244668007, 'timestamp': '2025-09-10 02:27:25.068231', 'step': 1327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:25.098267', 'step': 1327, 'epoch': 1} {'type': 'loss', 'content': 0.03304123878479004, 'timestamp': '2025-09-10 02:27:25.121609', 'step': 1328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:25.151203', 'step': 1328, 'epoch': 1} {'type': 'loss', 'content': 0.036667678505182266, 'timestamp': '2025-09-10 02:27:25.152843', 'step': 1329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:25.181525', 'step': 1329, 'epoch': 1} {'type': 'loss', 'content': 0.028064250946044922, 'timestamp': '2025-09-10 02:27:25.183440', 'step': 1330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:25.211924', 'step': 1330, 'epoch': 1} {'type': 'loss', 'content': 0.05444202199578285, 'timestamp': '2025-09-10 02:27:25.213490', 'step': 1331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:25.242331', 'step': 1331, 'epoch': 1} {'type': 'loss', 'content': 0.04092909023165703, 'timestamp': '2025-09-10 02:27:25.265266', 'step': 1332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:25.295180', 'step': 1332, 'epoch': 1} {'type': 'loss', 'content': 0.013612611219286919, 'timestamp': '2025-09-10 02:27:25.297057', 'step': 1333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:25.327124', 'step': 1333, 'epoch': 1} {'type': 'loss', 'content': 0.034610893577337265, 'timestamp': '2025-09-10 02:27:25.328787', 'step': 1334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:25.357837', 'step': 1334, 'epoch': 1} {'type': 'loss', 'content': 0.054943304508924484, 'timestamp': '2025-09-10 02:27:25.359688', 'step': 1335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:25.389019', 'step': 1335, 'epoch': 1} {'type': 'loss', 'content': 0.029924849048256874, 'timestamp': '2025-09-10 02:27:25.412329', 'step': 1336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:25.441649', 'step': 1336, 'epoch': 1} {'type': 'loss', 'content': 0.04340161755681038, 'timestamp': '2025-09-10 02:27:25.443305', 'step': 1337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:25.471960', 'step': 1337, 'epoch': 1} {'type': 'loss', 'content': 0.060874827206134796, 'timestamp': '2025-09-10 02:27:25.474073', 'step': 1338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:25.502774', 'step': 1338, 'epoch': 1} {'type': 'loss', 'content': 0.03564821928739548, 'timestamp': '2025-09-10 02:27:25.504949', 'step': 1339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:25.534082', 'step': 1339, 'epoch': 1} {'type': 'loss', 'content': 0.06171872466802597, 'timestamp': '2025-09-10 02:27:25.557291', 'step': 1340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:25.586922', 'step': 1340, 'epoch': 1} {'type': 'loss', 'content': 0.04816806688904762, 'timestamp': '2025-09-10 02:27:25.589893', 'step': 1341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:25.618100', 'step': 1341, 'epoch': 1} {'type': 'loss', 'content': 0.02508343569934368, 'timestamp': '2025-09-10 02:27:25.619879', 'step': 1342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:25.648973', 'step': 1342, 'epoch': 1} {'type': 'loss', 'content': 0.03181108459830284, 'timestamp': '2025-09-10 02:27:25.650392', 'step': 1343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:25.679385', 'step': 1343, 'epoch': 1} {'type': 'loss', 'content': 0.03315838426351547, 'timestamp': '2025-09-10 02:27:25.702800', 'step': 1344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:25.731711', 'step': 1344, 'epoch': 1} {'type': 'loss', 'content': 0.014892424456775188, 'timestamp': '2025-09-10 02:27:25.733270', 'step': 1345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:25.761950', 'step': 1345, 'epoch': 1} {'type': 'loss', 'content': 0.05184555426239967, 'timestamp': '2025-09-10 02:27:25.763554', 'step': 1346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:25.792001', 'step': 1346, 'epoch': 1} {'type': 'loss', 'content': 0.029433416202664375, 'timestamp': '2025-09-10 02:27:25.793680', 'step': 1347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:25.822146', 'step': 1347, 'epoch': 1} {'type': 'loss', 'content': 0.038075923919677734, 'timestamp': '2025-09-10 02:27:25.845383', 'step': 1348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:25.874069', 'step': 1348, 'epoch': 1} {'type': 'loss', 'content': 0.024071261286735535, 'timestamp': '2025-09-10 02:27:25.875796', 'step': 1349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:25.904319', 'step': 1349, 'epoch': 1} {'type': 'loss', 'content': 0.011127389967441559, 'timestamp': '2025-09-10 02:27:25.906209', 'step': 1350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:25.934896', 'step': 1350, 'epoch': 1} {'type': 'loss', 'content': 0.04546486213803291, 'timestamp': '2025-09-10 02:27:25.936795', 'step': 1351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:25.964997', 'step': 1351, 'epoch': 1} {'type': 'loss', 'content': 0.031817514449357986, 'timestamp': '2025-09-10 02:27:25.988359', 'step': 1352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:26.017349', 'step': 1352, 'epoch': 1} {'type': 'loss', 'content': 0.04650825262069702, 'timestamp': '2025-09-10 02:27:26.019266', 'step': 1353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:26.048007', 'step': 1353, 'epoch': 1} {'type': 'loss', 'content': 0.061909276992082596, 'timestamp': '2025-09-10 02:27:26.049835', 'step': 1354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:26.078465', 'step': 1354, 'epoch': 1} {'type': 'loss', 'content': 0.0383191742002964, 'timestamp': '2025-09-10 02:27:26.080320', 'step': 1355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:26.109443', 'step': 1355, 'epoch': 1} {'type': 'loss', 'content': 0.03044038824737072, 'timestamp': '2025-09-10 02:27:26.132773', 'step': 1356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:26.161530', 'step': 1356, 'epoch': 1} {'type': 'loss', 'content': 0.03768446296453476, 'timestamp': '2025-09-10 02:27:26.163376', 'step': 1357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:26.191854', 'step': 1357, 'epoch': 1} {'type': 'loss', 'content': 0.059272684156894684, 'timestamp': '2025-09-10 02:27:26.193841', 'step': 1358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:26.222364', 'step': 1358, 'epoch': 1} {'type': 'loss', 'content': 0.03366556391119957, 'timestamp': '2025-09-10 02:27:26.224210', 'step': 1359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:26.253159', 'step': 1359, 'epoch': 1} {'type': 'loss', 'content': 0.016975143924355507, 'timestamp': '2025-09-10 02:27:26.276192', 'step': 1360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:26.305011', 'step': 1360, 'epoch': 1} {'type': 'loss', 'content': 0.043722160160541534, 'timestamp': '2025-09-10 02:27:26.306625', 'step': 1361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:26.335283', 'step': 1361, 'epoch': 1} {'type': 'loss', 'content': 0.04664004221558571, 'timestamp': '2025-09-10 02:27:26.337083', 'step': 1362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:26.365836', 'step': 1362, 'epoch': 1} {'type': 'loss', 'content': 0.04614395648241043, 'timestamp': '2025-09-10 02:27:26.367448', 'step': 1363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:26.396613', 'step': 1363, 'epoch': 1} {'type': 'loss', 'content': 0.019763609394431114, 'timestamp': '2025-09-10 02:27:26.420097', 'step': 1364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:26.449219', 'step': 1364, 'epoch': 1} {'type': 'loss', 'content': 0.03162803500890732, 'timestamp': '2025-09-10 02:27:26.451064', 'step': 1365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:26.480075', 'step': 1365, 'epoch': 1} {'type': 'loss', 'content': 0.030755044892430305, 'timestamp': '2025-09-10 02:27:26.481471', 'step': 1366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:26.510063', 'step': 1366, 'epoch': 1} {'type': 'loss', 'content': 0.03401505574584007, 'timestamp': '2025-09-10 02:27:26.511882', 'step': 1367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:26.541269', 'step': 1367, 'epoch': 1} {'type': 'loss', 'content': 0.041028063744306564, 'timestamp': '2025-09-10 02:27:26.564400', 'step': 1368, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1582003754624}], 'timestamp': '2025-09-10 02:27:28.441898', 'step': 1368, 'epoch': 1} {'type': 'pplx', 'content': 2434394.08545723, 'timestamp': '2025-09-10 02:27:28.443674', 'step': 1368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:28.470905', 'step': 1368, 'epoch': 1} {'type': 'loss', 'content': 0.05904477462172508, 'timestamp': '2025-09-10 02:27:28.472700', 'step': 1369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:28.501163', 'step': 1369, 'epoch': 1} {'type': 'loss', 'content': 0.04174638167023659, 'timestamp': '2025-09-10 02:27:28.502976', 'step': 1370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:28.531659', 'step': 1370, 'epoch': 1} {'type': 'loss', 'content': 0.022512640804052353, 'timestamp': '2025-09-10 02:27:28.533441', 'step': 1371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:28.565795', 'step': 1371, 'epoch': 1} {'type': 'loss', 'content': 0.045179132372140884, 'timestamp': '2025-09-10 02:27:28.589340', 'step': 1372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:28.618289', 'step': 1372, 'epoch': 1} {'type': 'loss', 'content': 0.048660069704055786, 'timestamp': '2025-09-10 02:27:28.620142', 'step': 1373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:28.648768', 'step': 1373, 'epoch': 1} {'type': 'loss', 'content': 0.03543194383382797, 'timestamp': '2025-09-10 02:27:28.650597', 'step': 1374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:28.679648', 'step': 1374, 'epoch': 1} {'type': 'loss', 'content': 0.02141200564801693, 'timestamp': '2025-09-10 02:27:28.683184', 'step': 1375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:28.716161', 'step': 1375, 'epoch': 1} {'type': 'loss', 'content': 0.03578066825866699, 'timestamp': '2025-09-10 02:27:28.739558', 'step': 1376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:28.768689', 'step': 1376, 'epoch': 1} {'type': 'loss', 'content': 0.04398437216877937, 'timestamp': '2025-09-10 02:27:28.770902', 'step': 1377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:28.799421', 'step': 1377, 'epoch': 1} {'type': 'loss', 'content': 0.05702298879623413, 'timestamp': '2025-09-10 02:27:28.801203', 'step': 1378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:28.841154', 'step': 1378, 'epoch': 1} {'type': 'loss', 'content': 0.039825230836868286, 'timestamp': '2025-09-10 02:27:28.842965', 'step': 1379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:28.872672', 'step': 1379, 'epoch': 1} {'type': 'loss', 'content': 0.07406977564096451, 'timestamp': '2025-09-10 02:27:28.895862', 'step': 1380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:28.924385', 'step': 1380, 'epoch': 1} {'type': 'loss', 'content': 0.05747872591018677, 'timestamp': '2025-09-10 02:27:28.931011', 'step': 1381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:28.959586', 'step': 1381, 'epoch': 1} {'type': 'loss', 'content': 0.05309832841157913, 'timestamp': '2025-09-10 02:27:28.961376', 'step': 1382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:28.990092', 'step': 1382, 'epoch': 1} {'type': 'loss', 'content': 0.03823574632406235, 'timestamp': '2025-09-10 02:27:28.991800', 'step': 1383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:29.020402', 'step': 1383, 'epoch': 1} {'type': 'loss', 'content': 0.02102424018085003, 'timestamp': '2025-09-10 02:27:29.043641', 'step': 1384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:29.072209', 'step': 1384, 'epoch': 1} {'type': 'loss', 'content': 0.01896662451326847, 'timestamp': '2025-09-10 02:27:29.074094', 'step': 1385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:29.102653', 'step': 1385, 'epoch': 1} {'type': 'loss', 'content': 0.020455926656723022, 'timestamp': '2025-09-10 02:27:29.104239', 'step': 1386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:29.133020', 'step': 1386, 'epoch': 1} {'type': 'loss', 'content': 0.012913455255329609, 'timestamp': '2025-09-10 02:27:29.134826', 'step': 1387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:29.163800', 'step': 1387, 'epoch': 1} {'type': 'loss', 'content': 0.038935162127017975, 'timestamp': '2025-09-10 02:27:29.187067', 'step': 1388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:29.215552', 'step': 1388, 'epoch': 1} {'type': 'loss', 'content': 0.026753125712275505, 'timestamp': '2025-09-10 02:27:29.217490', 'step': 1389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:29.246765', 'step': 1389, 'epoch': 1} {'type': 'loss', 'content': 0.036826133728027344, 'timestamp': '2025-09-10 02:27:29.248536', 'step': 1390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:29.277180', 'step': 1390, 'epoch': 1} {'type': 'loss', 'content': 0.03888767585158348, 'timestamp': '2025-09-10 02:27:29.278959', 'step': 1391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:29.307722', 'step': 1391, 'epoch': 1} {'type': 'loss', 'content': 0.024767255410552025, 'timestamp': '2025-09-10 02:27:29.330817', 'step': 1392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:29.359796', 'step': 1392, 'epoch': 1} {'type': 'loss', 'content': 0.029736343771219254, 'timestamp': '2025-09-10 02:27:29.361497', 'step': 1393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:29.389866', 'step': 1393, 'epoch': 1} {'type': 'loss', 'content': 0.011788521893322468, 'timestamp': '2025-09-10 02:27:29.391779', 'step': 1394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:29.420713', 'step': 1394, 'epoch': 1} {'type': 'loss', 'content': 0.01132082287222147, 'timestamp': '2025-09-10 02:27:29.422430', 'step': 1395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:29.450980', 'step': 1395, 'epoch': 1} {'type': 'loss', 'content': 0.02078998275101185, 'timestamp': '2025-09-10 02:27:29.474254', 'step': 1396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:29.502649', 'step': 1396, 'epoch': 1} {'type': 'loss', 'content': 0.008231586776673794, 'timestamp': '2025-09-10 02:27:29.504515', 'step': 1397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:29.533131', 'step': 1397, 'epoch': 1} {'type': 'loss', 'content': 0.019323036074638367, 'timestamp': '2025-09-10 02:27:29.534822', 'step': 1398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:29.563520', 'step': 1398, 'epoch': 1} {'type': 'loss', 'content': 0.029249152168631554, 'timestamp': '2025-09-10 02:27:29.565256', 'step': 1399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:29.594018', 'step': 1399, 'epoch': 1} {'type': 'loss', 'content': 0.009170596487820148, 'timestamp': '2025-09-10 02:27:29.617294', 'step': 1400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:29.646067', 'step': 1400, 'epoch': 1} {'type': 'loss', 'content': 0.029752206057310104, 'timestamp': '2025-09-10 02:27:29.647661', 'step': 1401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:29.676226', 'step': 1401, 'epoch': 1} {'type': 'loss', 'content': 0.0362543947994709, 'timestamp': '2025-09-10 02:27:29.678085', 'step': 1402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:29.706690', 'step': 1402, 'epoch': 1} {'type': 'loss', 'content': 0.025247853249311447, 'timestamp': '2025-09-10 02:27:29.708628', 'step': 1403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:29.737140', 'step': 1403, 'epoch': 1} {'type': 'loss', 'content': 0.027348194271326065, 'timestamp': '2025-09-10 02:27:29.760403', 'step': 1404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:29.788925', 'step': 1404, 'epoch': 1} {'type': 'loss', 'content': 0.06653179228305817, 'timestamp': '2025-09-10 02:27:29.790587', 'step': 1405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:29.819154', 'step': 1405, 'epoch': 1} {'type': 'loss', 'content': 0.016154978424310684, 'timestamp': '2025-09-10 02:27:29.820770', 'step': 1406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:29.849061', 'step': 1406, 'epoch': 1} {'type': 'loss', 'content': 0.05642243102192879, 'timestamp': '2025-09-10 02:27:29.850292', 'step': 1407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:29.878976', 'step': 1407, 'epoch': 1} {'type': 'loss', 'content': 0.019452065229415894, 'timestamp': '2025-09-10 02:27:29.902168', 'step': 1408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:29.930913', 'step': 1408, 'epoch': 1} {'type': 'loss', 'content': 0.07361283153295517, 'timestamp': '2025-09-10 02:27:29.932456', 'step': 1409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:29.961652', 'step': 1409, 'epoch': 1} {'type': 'loss', 'content': 0.00370202399790287, 'timestamp': '2025-09-10 02:27:29.963206', 'step': 1410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:29.992063', 'step': 1410, 'epoch': 1} {'type': 'loss', 'content': 0.06461332738399506, 'timestamp': '2025-09-10 02:27:29.993564', 'step': 1411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:30.022127', 'step': 1411, 'epoch': 1} {'type': 'loss', 'content': 0.03506890684366226, 'timestamp': '2025-09-10 02:27:30.045216', 'step': 1412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:30.075040', 'step': 1412, 'epoch': 1} {'type': 'loss', 'content': 0.026593204587697983, 'timestamp': '2025-09-10 02:27:30.076280', 'step': 1413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:30.104607', 'step': 1413, 'epoch': 1} {'type': 'loss', 'content': 0.05700697749853134, 'timestamp': '2025-09-10 02:27:30.106264', 'step': 1414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:30.134859', 'step': 1414, 'epoch': 1} {'type': 'loss', 'content': 0.0367908738553524, 'timestamp': '2025-09-10 02:27:30.136335', 'step': 1415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:30.164862', 'step': 1415, 'epoch': 1} {'type': 'loss', 'content': 0.02905452810227871, 'timestamp': '2025-09-10 02:27:30.188075', 'step': 1416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:30.216592', 'step': 1416, 'epoch': 1} {'type': 'loss', 'content': 0.01928318664431572, 'timestamp': '2025-09-10 02:27:30.218317', 'step': 1417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:30.246935', 'step': 1417, 'epoch': 1} {'type': 'loss', 'content': 0.012731648981571198, 'timestamp': '2025-09-10 02:27:30.248564', 'step': 1418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:30.276602', 'step': 1418, 'epoch': 1} {'type': 'loss', 'content': 0.018270840868353844, 'timestamp': '2025-09-10 02:27:30.278265', 'step': 1419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:30.306910', 'step': 1419, 'epoch': 1} {'type': 'loss', 'content': 0.009613311849534512, 'timestamp': '2025-09-10 02:27:30.329917', 'step': 1420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:30.358706', 'step': 1420, 'epoch': 1} {'type': 'loss', 'content': 0.020077558234333992, 'timestamp': '2025-09-10 02:27:30.360179', 'step': 1421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:30.388704', 'step': 1421, 'epoch': 1} {'type': 'loss', 'content': 0.023439887911081314, 'timestamp': '2025-09-10 02:27:30.390178', 'step': 1422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:30.418772', 'step': 1422, 'epoch': 1} {'type': 'loss', 'content': 0.059081826359033585, 'timestamp': '2025-09-10 02:27:30.420397', 'step': 1423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:30.449116', 'step': 1423, 'epoch': 1} {'type': 'loss', 'content': 0.02458806149661541, 'timestamp': '2025-09-10 02:27:30.472364', 'step': 1424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:30.501203', 'step': 1424, 'epoch': 1} {'type': 'loss', 'content': 0.005730960983783007, 'timestamp': '2025-09-10 02:27:30.502852', 'step': 1425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:30.531681', 'step': 1425, 'epoch': 1} {'type': 'loss', 'content': 0.06985829770565033, 'timestamp': '2025-09-10 02:27:30.533405', 'step': 1426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:30.562738', 'step': 1426, 'epoch': 1} {'type': 'loss', 'content': 0.061879415065050125, 'timestamp': '2025-09-10 02:27:30.564218', 'step': 1427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:30.593331', 'step': 1427, 'epoch': 1} {'type': 'loss', 'content': 0.03619138523936272, 'timestamp': '2025-09-10 02:27:30.616559', 'step': 1428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:30.645213', 'step': 1428, 'epoch': 1} {'type': 'loss', 'content': 0.07978753000497818, 'timestamp': '2025-09-10 02:27:30.646679', 'step': 1429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:30.676025', 'step': 1429, 'epoch': 1} {'type': 'loss', 'content': 0.06555866450071335, 'timestamp': '2025-09-10 02:27:30.677684', 'step': 1430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:30.706434', 'step': 1430, 'epoch': 1} {'type': 'loss', 'content': 0.004598245490342379, 'timestamp': '2025-09-10 02:27:30.708157', 'step': 1431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:30.736985', 'step': 1431, 'epoch': 1} {'type': 'loss', 'content': 0.00360993854701519, 'timestamp': '2025-09-10 02:27:30.760311', 'step': 1432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:30.788480', 'step': 1432, 'epoch': 1} {'type': 'loss', 'content': 0.06020939350128174, 'timestamp': '2025-09-10 02:27:30.790148', 'step': 1433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:30.818669', 'step': 1433, 'epoch': 1} {'type': 'loss', 'content': 0.04477548226714134, 'timestamp': '2025-09-10 02:27:30.820102', 'step': 1434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:30.859687', 'step': 1434, 'epoch': 1} {'type': 'loss', 'content': 0.025800930336117744, 'timestamp': '2025-09-10 02:27:30.862378', 'step': 1435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:30.892915', 'step': 1435, 'epoch': 1} {'type': 'loss', 'content': 0.053567446768283844, 'timestamp': '2025-09-10 02:27:30.915782', 'step': 1436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:30.945167', 'step': 1436, 'epoch': 1} {'type': 'loss', 'content': 0.013317200355231762, 'timestamp': '2025-09-10 02:27:30.946822', 'step': 1437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:30.983818', 'step': 1437, 'epoch': 1} {'type': 'loss', 'content': 0.0013515051687136292, 'timestamp': '2025-09-10 02:27:30.985438', 'step': 1438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:31.034200', 'step': 1438, 'epoch': 1} {'type': 'loss', 'content': 0.01699732430279255, 'timestamp': '2025-09-10 02:27:31.035638', 'step': 1439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:31.064112', 'step': 1439, 'epoch': 1} {'type': 'loss', 'content': 0.03770739212632179, 'timestamp': '2025-09-10 02:27:31.087059', 'step': 1440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:31.115884', 'step': 1440, 'epoch': 1} {'type': 'loss', 'content': 0.009430590085685253, 'timestamp': '2025-09-10 02:27:31.117590', 'step': 1441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:31.146719', 'step': 1441, 'epoch': 1} {'type': 'loss', 'content': 0.05910153314471245, 'timestamp': '2025-09-10 02:27:31.148300', 'step': 1442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:31.176814', 'step': 1442, 'epoch': 1} {'type': 'loss', 'content': 0.05851801484823227, 'timestamp': '2025-09-10 02:27:31.178479', 'step': 1443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:31.207119', 'step': 1443, 'epoch': 1} {'type': 'loss', 'content': 0.0213641170412302, 'timestamp': '2025-09-10 02:27:31.230390', 'step': 1444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:31.259367', 'step': 1444, 'epoch': 1} {'type': 'loss', 'content': 0.09502411633729935, 'timestamp': '2025-09-10 02:27:31.261116', 'step': 1445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:31.289959', 'step': 1445, 'epoch': 1} {'type': 'loss', 'content': 0.015186154283583164, 'timestamp': '2025-09-10 02:27:31.291995', 'step': 1446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:31.320928', 'step': 1446, 'epoch': 1} {'type': 'loss', 'content': 0.028979049995541573, 'timestamp': '2025-09-10 02:27:31.322661', 'step': 1447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:31.351078', 'step': 1447, 'epoch': 1} {'type': 'loss', 'content': 0.04702762886881828, 'timestamp': '2025-09-10 02:27:31.374147', 'step': 1448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:31.403523', 'step': 1448, 'epoch': 1} {'type': 'loss', 'content': 0.05766434594988823, 'timestamp': '2025-09-10 02:27:31.405068', 'step': 1449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:31.433210', 'step': 1449, 'epoch': 1} {'type': 'loss', 'content': 0.019389281049370766, 'timestamp': '2025-09-10 02:27:31.434897', 'step': 1450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:31.463616', 'step': 1450, 'epoch': 1} {'type': 'loss', 'content': 0.02509896643459797, 'timestamp': '2025-09-10 02:27:31.465021', 'step': 1451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:31.493114', 'step': 1451, 'epoch': 1} {'type': 'loss', 'content': 0.003370345802977681, 'timestamp': '2025-09-10 02:27:31.516332', 'step': 1452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:31.545122', 'step': 1452, 'epoch': 1} {'type': 'loss', 'content': 0.007237529847770929, 'timestamp': '2025-09-10 02:27:31.546968', 'step': 1453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:31.576560', 'step': 1453, 'epoch': 1} {'type': 'loss', 'content': 0.02067815326154232, 'timestamp': '2025-09-10 02:27:31.578129', 'step': 1454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:31.606490', 'step': 1454, 'epoch': 1} {'type': 'loss', 'content': 0.025009362027049065, 'timestamp': '2025-09-10 02:27:31.608123', 'step': 1455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:31.637240', 'step': 1455, 'epoch': 1} {'type': 'loss', 'content': 0.030619319528341293, 'timestamp': '2025-09-10 02:27:31.660033', 'step': 1456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:31.688593', 'step': 1456, 'epoch': 1} {'type': 'loss', 'content': 0.043088074773550034, 'timestamp': '2025-09-10 02:27:31.689980', 'step': 1457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:31.718964', 'step': 1457, 'epoch': 1} {'type': 'loss', 'content': 0.040807925164699554, 'timestamp': '2025-09-10 02:27:31.720392', 'step': 1458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:31.749288', 'step': 1458, 'epoch': 1} {'type': 'loss', 'content': 0.03335711732506752, 'timestamp': '2025-09-10 02:27:31.750862', 'step': 1459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:31.779152', 'step': 1459, 'epoch': 1} {'type': 'loss', 'content': 0.026910170912742615, 'timestamp': '2025-09-10 02:27:31.802243', 'step': 1460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:31.830794', 'step': 1460, 'epoch': 1} {'type': 'loss', 'content': 0.016020607203245163, 'timestamp': '2025-09-10 02:27:31.832565', 'step': 1461, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:31.860702', 'step': 1461, 'epoch': 1} {'type': 'loss', 'content': 0.05180508643388748, 'timestamp': '2025-09-10 02:27:31.862204', 'step': 1462, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:31.890408', 'step': 1462, 'epoch': 1} {'type': 'loss', 'content': 0.02024121955037117, 'timestamp': '2025-09-10 02:27:31.892053', 'step': 1463, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:31.920488', 'step': 1463, 'epoch': 1} {'type': 'loss', 'content': 0.011840170249342918, 'timestamp': '2025-09-10 02:27:31.943615', 'step': 1464, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:31.971985', 'step': 1464, 'epoch': 1} {'type': 'loss', 'content': 0.012870186008512974, 'timestamp': '2025-09-10 02:27:31.973445', 'step': 1465, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:32.002753', 'step': 1465, 'epoch': 1} {'type': 'loss', 'content': 0.07110538333654404, 'timestamp': '2025-09-10 02:27:32.004078', 'step': 1466, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:32.032171', 'step': 1466, 'epoch': 1} {'type': 'loss', 'content': 0.032635726034641266, 'timestamp': '2025-09-10 02:27:32.034102', 'step': 1467, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:32.062702', 'step': 1467, 'epoch': 1} {'type': 'loss', 'content': 0.014046980999410152, 'timestamp': '2025-09-10 02:27:32.085805', 'step': 1468, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:32.114594', 'step': 1468, 'epoch': 1} {'type': 'loss', 'content': 0.013907646760344505, 'timestamp': '2025-09-10 02:27:32.116359', 'step': 1469, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:32.145027', 'step': 1469, 'epoch': 1} {'type': 'loss', 'content': 0.021792763844132423, 'timestamp': '2025-09-10 02:27:32.146898', 'step': 1470, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:32.176262', 'step': 1470, 'epoch': 1} {'type': 'loss', 'content': 0.0276406891644001, 'timestamp': '2025-09-10 02:27:32.177950', 'step': 1471, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:32.206718', 'step': 1471, 'epoch': 1} {'type': 'loss', 'content': 0.0620223693549633, 'timestamp': '2025-09-10 02:27:32.229693', 'step': 1472, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:32.258221', 'step': 1472, 'epoch': 1} {'type': 'loss', 'content': 0.030954156070947647, 'timestamp': '2025-09-10 02:27:32.259806', 'step': 1473, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:32.288366', 'step': 1473, 'epoch': 1} {'type': 'loss', 'content': 0.029085157439112663, 'timestamp': '2025-09-10 02:27:32.291308', 'step': 1474, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:32.322048', 'step': 1474, 'epoch': 1} {'type': 'loss', 'content': 0.04033757373690605, 'timestamp': '2025-09-10 02:27:32.323892', 'step': 1475, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:32.352760', 'step': 1475, 'epoch': 1} {'type': 'loss', 'content': 0.010873624123632908, 'timestamp': '2025-09-10 02:27:32.375762', 'step': 1476, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:32.405040', 'step': 1476, 'epoch': 1} {'type': 'loss', 'content': 0.013691750355064869, 'timestamp': '2025-09-10 02:27:32.406520', 'step': 1477, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:32.435024', 'step': 1477, 'epoch': 1} {'type': 'loss', 'content': 0.03100155107676983, 'timestamp': '2025-09-10 02:27:32.436679', 'step': 1478, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:32.465034', 'step': 1478, 'epoch': 1} {'type': 'loss', 'content': 0.04212302714586258, 'timestamp': '2025-09-10 02:27:32.466551', 'step': 1479, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:32.495069', 'step': 1479, 'epoch': 1} {'type': 'loss', 'content': 0.030898677185177803, 'timestamp': '2025-09-10 02:27:32.518256', 'step': 1480, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:32.547040', 'step': 1480, 'epoch': 1} {'type': 'loss', 'content': 0.023815101012587547, 'timestamp': '2025-09-10 02:27:32.548369', 'step': 1481, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:32.576975', 'step': 1481, 'epoch': 1} {'type': 'loss', 'content': 0.014275996014475822, 'timestamp': '2025-09-10 02:27:32.578699', 'step': 1482, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:32.606997', 'step': 1482, 'epoch': 1} {'type': 'loss', 'content': 0.009228719398379326, 'timestamp': '2025-09-10 02:27:32.608707', 'step': 1483, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:32.636795', 'step': 1483, 'epoch': 1} {'type': 'loss', 'content': 0.03572138771414757, 'timestamp': '2025-09-10 02:27:32.659766', 'step': 1484, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:32.688203', 'step': 1484, 'epoch': 1} {'type': 'loss', 'content': 0.04589131101965904, 'timestamp': '2025-09-10 02:27:32.689613', 'step': 1485, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:32.717728', 'step': 1485, 'epoch': 1} {'type': 'loss', 'content': 0.02454020082950592, 'timestamp': '2025-09-10 02:27:32.719125', 'step': 1486, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:32.747188', 'step': 1486, 'epoch': 1} {'type': 'loss', 'content': 0.01638377271592617, 'timestamp': '2025-09-10 02:27:32.748874', 'step': 1487, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:32.777924', 'step': 1487, 'epoch': 1} {'type': 'loss', 'content': 0.019874457269906998, 'timestamp': '2025-09-10 02:27:32.801074', 'step': 1488, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:32.832662', 'step': 1488, 'epoch': 1} {'type': 'loss', 'content': 0.037941742688417435, 'timestamp': '2025-09-10 02:27:32.834348', 'step': 1489, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:32.865054', 'step': 1489, 'epoch': 1} {'type': 'loss', 'content': 0.04492131620645523, 'timestamp': '2025-09-10 02:27:32.866852', 'step': 1490, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:32.895175', 'step': 1490, 'epoch': 1} {'type': 'loss', 'content': 0.07406710088253021, 'timestamp': '2025-09-10 02:27:32.896565', 'step': 1491, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:32.925001', 'step': 1491, 'epoch': 1} {'type': 'loss', 'content': 0.038812749087810516, 'timestamp': '2025-09-10 02:27:32.948254', 'step': 1492, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:32.976640', 'step': 1492, 'epoch': 1} {'type': 'loss', 'content': 0.023289749398827553, 'timestamp': '2025-09-10 02:27:32.978144', 'step': 1493, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:33.006669', 'step': 1493, 'epoch': 1} {'type': 'loss', 'content': 0.022751254960894585, 'timestamp': '2025-09-10 02:27:33.008143', 'step': 1494, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:33.036653', 'step': 1494, 'epoch': 1} {'type': 'loss', 'content': 0.02474578656256199, 'timestamp': '2025-09-10 02:27:33.038144', 'step': 1495, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:33.067191', 'step': 1495, 'epoch': 1} {'type': 'loss', 'content': 0.027360495179891586, 'timestamp': '2025-09-10 02:27:33.090160', 'step': 1496, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:33.119219', 'step': 1496, 'epoch': 1} {'type': 'loss', 'content': 0.028771663084626198, 'timestamp': '2025-09-10 02:27:33.120486', 'step': 1497, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:33.149008', 'step': 1497, 'epoch': 1} {'type': 'loss', 'content': 0.03129454329609871, 'timestamp': '2025-09-10 02:27:33.150332', 'step': 1498, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:33.178918', 'step': 1498, 'epoch': 1} {'type': 'loss', 'content': 0.032071299850940704, 'timestamp': '2025-09-10 02:27:33.180420', 'step': 1499, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:33.208968', 'step': 1499, 'epoch': 1} {'type': 'loss', 'content': 0.01595943421125412, 'timestamp': '2025-09-10 02:27:33.231876', 'step': 1500, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 1500', 'timestamp': '2025-09-10 02:27:37.678201', 'step': 1500, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:37.714896', 'step': 1500, 'epoch': 1} {'type': 'loss', 'content': 0.031216096132993698, 'timestamp': '2025-09-10 02:27:37.716786', 'step': 1501, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:37.746667', 'step': 1501, 'epoch': 1} {'type': 'loss', 'content': 0.03223239257931709, 'timestamp': '2025-09-10 02:27:37.748366', 'step': 1502, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:37.776981', 'step': 1502, 'epoch': 1} {'type': 'loss', 'content': 0.016054486855864525, 'timestamp': '2025-09-10 02:27:37.778737', 'step': 1503, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:37.807761', 'step': 1503, 'epoch': 1} {'type': 'loss', 'content': 0.04805648326873779, 'timestamp': '2025-09-10 02:27:37.831105', 'step': 1504, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:37.860972', 'step': 1504, 'epoch': 1} {'type': 'loss', 'content': 0.033813804388046265, 'timestamp': '2025-09-10 02:27:37.862379', 'step': 1505, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:37.891485', 'step': 1505, 'epoch': 1} {'type': 'loss', 'content': 0.02551659755408764, 'timestamp': '2025-09-10 02:27:37.893157', 'step': 1506, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:37.923206', 'step': 1506, 'epoch': 1} {'type': 'loss', 'content': 0.07560396194458008, 'timestamp': '2025-09-10 02:27:37.924923', 'step': 1507, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:37.953608', 'step': 1507, 'epoch': 1} {'type': 'loss', 'content': 0.016034072265028954, 'timestamp': '2025-09-10 02:27:37.977075', 'step': 1508, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:38.005684', 'step': 1508, 'epoch': 1} {'type': 'loss', 'content': 0.054234106093645096, 'timestamp': '2025-09-10 02:27:38.007523', 'step': 1509, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:38.037083', 'step': 1509, 'epoch': 1} {'type': 'loss', 'content': 0.055994562804698944, 'timestamp': '2025-09-10 02:27:38.038696', 'step': 1510, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:38.067343', 'step': 1510, 'epoch': 1} {'type': 'loss', 'content': 0.05683322623372078, 'timestamp': '2025-09-10 02:27:38.069835', 'step': 1511, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:38.098901', 'step': 1511, 'epoch': 1} {'type': 'loss', 'content': 0.044902119785547256, 'timestamp': '2025-09-10 02:27:38.122281', 'step': 1512, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:38.151339', 'step': 1512, 'epoch': 1} {'type': 'loss', 'content': 0.049421072006225586, 'timestamp': '2025-09-10 02:27:38.152944', 'step': 1513, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:38.181620', 'step': 1513, 'epoch': 1} {'type': 'loss', 'content': 0.02994488552212715, 'timestamp': '2025-09-10 02:27:38.183424', 'step': 1514, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:38.212678', 'step': 1514, 'epoch': 1} {'type': 'loss', 'content': 0.024709923192858696, 'timestamp': '2025-09-10 02:27:38.214372', 'step': 1515, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:38.243432', 'step': 1515, 'epoch': 1} {'type': 'loss', 'content': 0.006796296685934067, 'timestamp': '2025-09-10 02:27:38.267015', 'step': 1516, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:38.295646', 'step': 1516, 'epoch': 1} {'type': 'loss', 'content': 0.01112403254956007, 'timestamp': '2025-09-10 02:27:38.297418', 'step': 1517, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:38.326801', 'step': 1517, 'epoch': 1} {'type': 'loss', 'content': 0.018252085894346237, 'timestamp': '2025-09-10 02:27:38.328406', 'step': 1518, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:38.356887', 'step': 1518, 'epoch': 1} {'type': 'loss', 'content': 0.01778973825275898, 'timestamp': '2025-09-10 02:27:38.358688', 'step': 1519, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:38.387741', 'step': 1519, 'epoch': 1} {'type': 'loss', 'content': 0.03736981377005577, 'timestamp': '2025-09-10 02:27:38.410900', 'step': 1520, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1582003754624}], 'timestamp': '2025-09-10 02:27:40.281408', 'step': 1520, 'epoch': 1} {'type': 'pplx', 'content': 2612814.9375976576, 'timestamp': '2025-09-10 02:27:40.282825', 'step': 1520, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:40.311317', 'step': 1520, 'epoch': 1} {'type': 'loss', 'content': 0.028275569900870323, 'timestamp': '2025-09-10 02:27:40.313021', 'step': 1521, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:40.343548', 'step': 1521, 'epoch': 1} {'type': 'loss', 'content': 0.020396174862980843, 'timestamp': '2025-09-10 02:27:40.345347', 'step': 1522, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:40.376004', 'step': 1522, 'epoch': 1} {'type': 'loss', 'content': 0.027503030374646187, 'timestamp': '2025-09-10 02:27:40.377518', 'step': 1523, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:40.407281', 'step': 1523, 'epoch': 1} {'type': 'loss', 'content': 0.03672170639038086, 'timestamp': '2025-09-10 02:27:40.430620', 'step': 1524, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:40.460546', 'step': 1524, 'epoch': 1} {'type': 'loss', 'content': 0.02398330345749855, 'timestamp': '2025-09-10 02:27:40.462249', 'step': 1525, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:40.491664', 'step': 1525, 'epoch': 1} {'type': 'loss', 'content': 0.07760308682918549, 'timestamp': '2025-09-10 02:27:40.493790', 'step': 1526, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:40.523312', 'step': 1526, 'epoch': 1} {'type': 'loss', 'content': 0.026426970958709717, 'timestamp': '2025-09-10 02:27:40.525266', 'step': 1527, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:40.554605', 'step': 1527, 'epoch': 1} {'type': 'loss', 'content': 0.08535528182983398, 'timestamp': '2025-09-10 02:27:40.578519', 'step': 1528, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:40.607561', 'step': 1528, 'epoch': 1} {'type': 'loss', 'content': 0.03914172574877739, 'timestamp': '2025-09-10 02:27:40.609195', 'step': 1529, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:40.638632', 'step': 1529, 'epoch': 1} {'type': 'loss', 'content': 0.009633233770728111, 'timestamp': '2025-09-10 02:27:40.640508', 'step': 1530, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:40.669737', 'step': 1530, 'epoch': 1} {'type': 'loss', 'content': 0.0483718141913414, 'timestamp': '2025-09-10 02:27:40.671540', 'step': 1531, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:40.700091', 'step': 1531, 'epoch': 1} {'type': 'loss', 'content': 0.049617115408182144, 'timestamp': '2025-09-10 02:27:40.723592', 'step': 1532, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:40.753058', 'step': 1532, 'epoch': 1} {'type': 'loss', 'content': 0.018541039898991585, 'timestamp': '2025-09-10 02:27:40.754888', 'step': 1533, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:40.784533', 'step': 1533, 'epoch': 1} {'type': 'loss', 'content': 0.03365081921219826, 'timestamp': '2025-09-10 02:27:40.786130', 'step': 1534, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:40.814712', 'step': 1534, 'epoch': 1} {'type': 'loss', 'content': 0.042272280901670456, 'timestamp': '2025-09-10 02:27:40.816551', 'step': 1535, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:40.845211', 'step': 1535, 'epoch': 1} {'type': 'loss', 'content': 0.03286171704530716, 'timestamp': '2025-09-10 02:27:40.868559', 'step': 1536, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:40.897786', 'step': 1536, 'epoch': 1} {'type': 'loss', 'content': 0.027605948969721794, 'timestamp': '2025-09-10 02:27:40.899115', 'step': 1537, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:40.927427', 'step': 1537, 'epoch': 1} {'type': 'loss', 'content': 0.01908447965979576, 'timestamp': '2025-09-10 02:27:40.929138', 'step': 1538, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:40.957733', 'step': 1538, 'epoch': 1} {'type': 'loss', 'content': 0.053460389375686646, 'timestamp': '2025-09-10 02:27:40.958928', 'step': 1539, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:40.988080', 'step': 1539, 'epoch': 1} {'type': 'loss', 'content': 0.04275515303015709, 'timestamp': '2025-09-10 02:27:41.011207', 'step': 1540, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:41.040564', 'step': 1540, 'epoch': 1} {'type': 'loss', 'content': 0.046914078295230865, 'timestamp': '2025-09-10 02:27:41.042382', 'step': 1541, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:41.071814', 'step': 1541, 'epoch': 1} {'type': 'loss', 'content': 0.02406330220401287, 'timestamp': '2025-09-10 02:27:41.073356', 'step': 1542, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:41.101928', 'step': 1542, 'epoch': 1} {'type': 'loss', 'content': 0.004797068890184164, 'timestamp': '2025-09-10 02:27:41.103390', 'step': 1543, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:41.132295', 'step': 1543, 'epoch': 1} {'type': 'loss', 'content': 0.032057516276836395, 'timestamp': '2025-09-10 02:27:41.155503', 'step': 1544, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:41.184572', 'step': 1544, 'epoch': 1} {'type': 'loss', 'content': 0.03724870830774307, 'timestamp': '2025-09-10 02:27:41.186365', 'step': 1545, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:41.215090', 'step': 1545, 'epoch': 1} {'type': 'loss', 'content': 0.03298107162117958, 'timestamp': '2025-09-10 02:27:41.217082', 'step': 1546, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:41.246546', 'step': 1546, 'epoch': 1} {'type': 'loss', 'content': 0.05260986462235451, 'timestamp': '2025-09-10 02:27:41.248449', 'step': 1547, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:41.277553', 'step': 1547, 'epoch': 1} {'type': 'loss', 'content': 0.01699446514248848, 'timestamp': '2025-09-10 02:27:41.301026', 'step': 1548, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:41.330790', 'step': 1548, 'epoch': 1} {'type': 'loss', 'content': 0.031801871955394745, 'timestamp': '2025-09-10 02:27:41.332663', 'step': 1549, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:41.361835', 'step': 1549, 'epoch': 1} {'type': 'loss', 'content': 0.04717520251870155, 'timestamp': '2025-09-10 02:27:41.363819', 'step': 1550, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:41.392884', 'step': 1550, 'epoch': 1} {'type': 'loss', 'content': 0.018794167786836624, 'timestamp': '2025-09-10 02:27:41.394972', 'step': 1551, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:41.424031', 'step': 1551, 'epoch': 1} {'type': 'loss', 'content': 0.029533835127949715, 'timestamp': '2025-09-10 02:27:41.447423', 'step': 1552, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:41.476409', 'step': 1552, 'epoch': 1} {'type': 'loss', 'content': 0.03371378034353256, 'timestamp': '2025-09-10 02:27:41.478115', 'step': 1553, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:41.509111', 'step': 1553, 'epoch': 1} {'type': 'loss', 'content': 0.02296869456768036, 'timestamp': '2025-09-10 02:27:41.510958', 'step': 1554, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:41.540351', 'step': 1554, 'epoch': 1} {'type': 'loss', 'content': 0.05213111639022827, 'timestamp': '2025-09-10 02:27:41.542372', 'step': 1555, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:41.571477', 'step': 1555, 'epoch': 1} {'type': 'loss', 'content': 0.05112554877996445, 'timestamp': '2025-09-10 02:27:41.595042', 'step': 1556, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:41.623777', 'step': 1556, 'epoch': 1} {'type': 'loss', 'content': 0.03932832553982735, 'timestamp': '2025-09-10 02:27:41.625655', 'step': 1557, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:41.654816', 'step': 1557, 'epoch': 1} {'type': 'loss', 'content': 0.029580960050225258, 'timestamp': '2025-09-10 02:27:41.656523', 'step': 1558, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:41.687598', 'step': 1558, 'epoch': 1} {'type': 'loss', 'content': 0.03373652696609497, 'timestamp': '2025-09-10 02:27:41.688977', 'step': 1559, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:41.717909', 'step': 1559, 'epoch': 1} {'type': 'loss', 'content': 0.022883696481585503, 'timestamp': '2025-09-10 02:27:41.741038', 'step': 1560, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:41.776334', 'step': 1560, 'epoch': 1} {'type': 'loss', 'content': 0.015995008870959282, 'timestamp': '2025-09-10 02:27:41.778088', 'step': 1561, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:41.820795', 'step': 1561, 'epoch': 1} {'type': 'loss', 'content': 0.020030509680509567, 'timestamp': '2025-09-10 02:27:41.823969', 'step': 1562, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:41.865922', 'step': 1562, 'epoch': 1} {'type': 'loss', 'content': 0.02815486304461956, 'timestamp': '2025-09-10 02:27:41.867372', 'step': 1563, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:27:41.896316', 'step': 1563, 'epoch': 1} {'type': 'loss', 'content': 0.04295634850859642, 'timestamp': '2025-09-10 02:27:41.919726', 'step': 1564, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:41.949200', 'step': 1564, 'epoch': 1} {'type': 'loss', 'content': 0.04763511195778847, 'timestamp': '2025-09-10 02:27:41.951126', 'step': 1565, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:41.981311', 'step': 1565, 'epoch': 1} {'type': 'loss', 'content': 0.030427174642682076, 'timestamp': '2025-09-10 02:27:41.982910', 'step': 1566, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:42.011390', 'step': 1566, 'epoch': 1} {'type': 'loss', 'content': 0.016630670055747032, 'timestamp': '2025-09-10 02:27:42.013229', 'step': 1567, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:42.046782', 'step': 1567, 'epoch': 1} {'type': 'loss', 'content': 0.0399869866669178, 'timestamp': '2025-09-10 02:27:42.072887', 'step': 1568, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:42.102813', 'step': 1568, 'epoch': 1} {'type': 'loss', 'content': 0.04899362847208977, 'timestamp': '2025-09-10 02:27:42.104331', 'step': 1569, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:42.134548', 'step': 1569, 'epoch': 1} {'type': 'loss', 'content': 0.018443822860717773, 'timestamp': '2025-09-10 02:27:42.136639', 'step': 1570, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:42.165712', 'step': 1570, 'epoch': 1} {'type': 'loss', 'content': 0.038207512348890305, 'timestamp': '2025-09-10 02:27:42.167702', 'step': 1571, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:42.196489', 'step': 1571, 'epoch': 1} {'type': 'loss', 'content': 0.0143889794126153, 'timestamp': '2025-09-10 02:27:42.219652', 'step': 1572, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:42.248928', 'step': 1572, 'epoch': 1} {'type': 'loss', 'content': 0.0734647661447525, 'timestamp': '2025-09-10 02:27:42.250935', 'step': 1573, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:42.283104', 'step': 1573, 'epoch': 1} {'type': 'loss', 'content': 0.022592630237340927, 'timestamp': '2025-09-10 02:27:42.285116', 'step': 1574, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:42.323233', 'step': 1574, 'epoch': 1} {'type': 'loss', 'content': 0.0480295792222023, 'timestamp': '2025-09-10 02:27:42.324792', 'step': 1575, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:42.352974', 'step': 1575, 'epoch': 1} {'type': 'loss', 'content': 0.0173494815826416, 'timestamp': '2025-09-10 02:27:42.376119', 'step': 1576, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:42.404432', 'step': 1576, 'epoch': 1} {'type': 'loss', 'content': 0.04874527081847191, 'timestamp': '2025-09-10 02:27:42.405747', 'step': 1577, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:42.434276', 'step': 1577, 'epoch': 1} {'type': 'loss', 'content': 0.027809513732790947, 'timestamp': '2025-09-10 02:27:42.435700', 'step': 1578, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:42.463979', 'step': 1578, 'epoch': 1} {'type': 'loss', 'content': 0.033332522958517075, 'timestamp': '2025-09-10 02:27:42.465523', 'step': 1579, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:42.494495', 'step': 1579, 'epoch': 1} {'type': 'loss', 'content': 0.050775084644556046, 'timestamp': '2025-09-10 02:27:42.517496', 'step': 1580, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:42.545903', 'step': 1580, 'epoch': 1} {'type': 'loss', 'content': 0.03698405623435974, 'timestamp': '2025-09-10 02:27:42.547403', 'step': 1581, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:42.576528', 'step': 1581, 'epoch': 1} {'type': 'loss', 'content': 0.05788445845246315, 'timestamp': '2025-09-10 02:27:42.578052', 'step': 1582, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:42.606079', 'step': 1582, 'epoch': 1} {'type': 'loss', 'content': 0.009278425946831703, 'timestamp': '2025-09-10 02:27:42.607818', 'step': 1583, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:42.636501', 'step': 1583, 'epoch': 1} {'type': 'loss', 'content': 0.05599082633852959, 'timestamp': '2025-09-10 02:27:42.659836', 'step': 1584, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:42.689019', 'step': 1584, 'epoch': 1} {'type': 'loss', 'content': 0.03505483642220497, 'timestamp': '2025-09-10 02:27:42.690790', 'step': 1585, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:42.719347', 'step': 1585, 'epoch': 1} {'type': 'loss', 'content': 0.048709262162446976, 'timestamp': '2025-09-10 02:27:42.720747', 'step': 1586, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:42.749335', 'step': 1586, 'epoch': 1} {'type': 'loss', 'content': 0.06378418952226639, 'timestamp': '2025-09-10 02:27:42.750876', 'step': 1587, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:42.779259', 'step': 1587, 'epoch': 1} {'type': 'loss', 'content': 0.02308001182973385, 'timestamp': '2025-09-10 02:27:42.802379', 'step': 1588, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:42.830568', 'step': 1588, 'epoch': 1} {'type': 'loss', 'content': 0.036102551966905594, 'timestamp': '2025-09-10 02:27:42.832346', 'step': 1589, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:42.861945', 'step': 1589, 'epoch': 1} {'type': 'loss', 'content': 0.03362468630075455, 'timestamp': '2025-09-10 02:27:42.863472', 'step': 1590, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:42.891559', 'step': 1590, 'epoch': 1} {'type': 'loss', 'content': 0.04461940377950668, 'timestamp': '2025-09-10 02:27:42.892977', 'step': 1591, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:42.921518', 'step': 1591, 'epoch': 1} {'type': 'loss', 'content': 0.04401617869734764, 'timestamp': '2025-09-10 02:27:42.944417', 'step': 1592, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:42.974190', 'step': 1592, 'epoch': 1} {'type': 'loss', 'content': 0.03486516699194908, 'timestamp': '2025-09-10 02:27:42.975532', 'step': 1593, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:43.005314', 'step': 1593, 'epoch': 1} {'type': 'loss', 'content': 0.030225077643990517, 'timestamp': '2025-09-10 02:27:43.007081', 'step': 1594, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:43.036700', 'step': 1594, 'epoch': 1} {'type': 'loss', 'content': 0.04530999809503555, 'timestamp': '2025-09-10 02:27:43.038232', 'step': 1595, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:43.067360', 'step': 1595, 'epoch': 1} {'type': 'loss', 'content': 0.03315466269850731, 'timestamp': '2025-09-10 02:27:43.090271', 'step': 1596, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:43.118612', 'step': 1596, 'epoch': 1} {'type': 'loss', 'content': 0.03433266654610634, 'timestamp': '2025-09-10 02:27:43.120222', 'step': 1597, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:43.148786', 'step': 1597, 'epoch': 1} {'type': 'loss', 'content': 0.017902569845318794, 'timestamp': '2025-09-10 02:27:43.150302', 'step': 1598, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:43.178863', 'step': 1598, 'epoch': 1} {'type': 'loss', 'content': 0.032323576509952545, 'timestamp': '2025-09-10 02:27:43.180600', 'step': 1599, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:43.208792', 'step': 1599, 'epoch': 1} {'type': 'loss', 'content': 0.01567261666059494, 'timestamp': '2025-09-10 02:27:43.231977', 'step': 1600, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:43.261132', 'step': 1600, 'epoch': 1} {'type': 'loss', 'content': 0.03300542011857033, 'timestamp': '2025-09-10 02:27:43.262600', 'step': 1601, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:43.290807', 'step': 1601, 'epoch': 1} {'type': 'loss', 'content': 0.03128361329436302, 'timestamp': '2025-09-10 02:27:43.292259', 'step': 1602, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:43.320991', 'step': 1602, 'epoch': 1} {'type': 'loss', 'content': 0.021510543301701546, 'timestamp': '2025-09-10 02:27:43.322612', 'step': 1603, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:43.350895', 'step': 1603, 'epoch': 1} {'type': 'loss', 'content': 0.05365094542503357, 'timestamp': '2025-09-10 02:27:43.374043', 'step': 1604, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:43.403726', 'step': 1604, 'epoch': 1} {'type': 'loss', 'content': 0.03761393204331398, 'timestamp': '2025-09-10 02:27:43.405308', 'step': 1605, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:43.433961', 'step': 1605, 'epoch': 1} {'type': 'loss', 'content': 0.00948913861066103, 'timestamp': '2025-09-10 02:27:43.435461', 'step': 1606, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:43.463843', 'step': 1606, 'epoch': 1} {'type': 'loss', 'content': 0.047155749052762985, 'timestamp': '2025-09-10 02:27:43.465336', 'step': 1607, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:43.494372', 'step': 1607, 'epoch': 1} {'type': 'loss', 'content': 0.0055794003419578075, 'timestamp': '2025-09-10 02:27:43.517506', 'step': 1608, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:43.545822', 'step': 1608, 'epoch': 1} {'type': 'loss', 'content': 0.021954579278826714, 'timestamp': '2025-09-10 02:27:43.547425', 'step': 1609, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:43.576580', 'step': 1609, 'epoch': 1} {'type': 'loss', 'content': 0.02650618925690651, 'timestamp': '2025-09-10 02:27:43.577987', 'step': 1610, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:43.606257', 'step': 1610, 'epoch': 1} {'type': 'loss', 'content': 0.043255094438791275, 'timestamp': '2025-09-10 02:27:43.608041', 'step': 1611, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:43.636593', 'step': 1611, 'epoch': 1} {'type': 'loss', 'content': 0.03896671161055565, 'timestamp': '2025-09-10 02:27:43.659738', 'step': 1612, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:43.688092', 'step': 1612, 'epoch': 1} {'type': 'loss', 'content': 0.013624775223433971, 'timestamp': '2025-09-10 02:27:43.689711', 'step': 1613, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:43.718571', 'step': 1613, 'epoch': 1} {'type': 'loss', 'content': 0.039595868438482285, 'timestamp': '2025-09-10 02:27:43.720447', 'step': 1614, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:43.749713', 'step': 1614, 'epoch': 1} {'type': 'loss', 'content': 0.031632862985134125, 'timestamp': '2025-09-10 02:27:43.751347', 'step': 1615, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:43.779932', 'step': 1615, 'epoch': 1} {'type': 'loss', 'content': 0.03027370758354664, 'timestamp': '2025-09-10 02:27:43.802963', 'step': 1616, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:43.831392', 'step': 1616, 'epoch': 1} {'type': 'loss', 'content': 0.03935625031590462, 'timestamp': '2025-09-10 02:27:43.833006', 'step': 1617, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:43.861948', 'step': 1617, 'epoch': 1} {'type': 'loss', 'content': 0.0709146186709404, 'timestamp': '2025-09-10 02:27:43.863473', 'step': 1618, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:43.892693', 'step': 1618, 'epoch': 1} {'type': 'loss', 'content': 0.04960712417960167, 'timestamp': '2025-09-10 02:27:43.894176', 'step': 1619, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:43.923086', 'step': 1619, 'epoch': 1} {'type': 'loss', 'content': 0.022915851324796677, 'timestamp': '2025-09-10 02:27:43.946113', 'step': 1620, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:43.975367', 'step': 1620, 'epoch': 1} {'type': 'loss', 'content': 0.016043413430452347, 'timestamp': '2025-09-10 02:27:43.978073', 'step': 1621, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:44.010444', 'step': 1621, 'epoch': 1} {'type': 'loss', 'content': 0.017392989248037338, 'timestamp': '2025-09-10 02:27:44.012073', 'step': 1622, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:44.041207', 'step': 1622, 'epoch': 1} {'type': 'loss', 'content': 0.060260094702243805, 'timestamp': '2025-09-10 02:27:44.042762', 'step': 1623, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:44.071066', 'step': 1623, 'epoch': 1} {'type': 'loss', 'content': 0.058866389095783234, 'timestamp': '2025-09-10 02:27:44.093964', 'step': 1624, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:44.123024', 'step': 1624, 'epoch': 1} {'type': 'loss', 'content': 0.014653525315225124, 'timestamp': '2025-09-10 02:27:44.124436', 'step': 1625, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:44.152603', 'step': 1625, 'epoch': 1} {'type': 'loss', 'content': 0.03627878054976463, 'timestamp': '2025-09-10 02:27:44.154244', 'step': 1626, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:44.182972', 'step': 1626, 'epoch': 1} {'type': 'loss', 'content': 0.03967222198843956, 'timestamp': '2025-09-10 02:27:44.185016', 'step': 1627, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:44.213630', 'step': 1627, 'epoch': 1} {'type': 'loss', 'content': 0.048887427896261215, 'timestamp': '2025-09-10 02:27:44.236990', 'step': 1628, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:44.265542', 'step': 1628, 'epoch': 1} {'type': 'loss', 'content': 0.037526946514844894, 'timestamp': '2025-09-10 02:27:44.267370', 'step': 1629, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:44.296660', 'step': 1629, 'epoch': 1} {'type': 'loss', 'content': 0.00783440750092268, 'timestamp': '2025-09-10 02:27:44.298230', 'step': 1630, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:44.326724', 'step': 1630, 'epoch': 1} {'type': 'loss', 'content': 0.011957625858485699, 'timestamp': '2025-09-10 02:27:44.328413', 'step': 1631, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:44.357216', 'step': 1631, 'epoch': 1} {'type': 'loss', 'content': 0.03405969962477684, 'timestamp': '2025-09-10 02:27:44.380626', 'step': 1632, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:44.409404', 'step': 1632, 'epoch': 1} {'type': 'loss', 'content': 0.011136604472994804, 'timestamp': '2025-09-10 02:27:44.411003', 'step': 1633, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:44.439355', 'step': 1633, 'epoch': 1} {'type': 'loss', 'content': 0.036693062633275986, 'timestamp': '2025-09-10 02:27:44.441105', 'step': 1634, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:44.469826', 'step': 1634, 'epoch': 1} {'type': 'loss', 'content': 0.021266072988510132, 'timestamp': '2025-09-10 02:27:44.471433', 'step': 1635, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:44.500192', 'step': 1635, 'epoch': 1} {'type': 'loss', 'content': 0.01566631905734539, 'timestamp': '2025-09-10 02:27:44.523117', 'step': 1636, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:44.551965', 'step': 1636, 'epoch': 1} {'type': 'loss', 'content': 0.04040166735649109, 'timestamp': '2025-09-10 02:27:44.553651', 'step': 1637, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:44.582469', 'step': 1637, 'epoch': 1} {'type': 'loss', 'content': 0.0320776030421257, 'timestamp': '2025-09-10 02:27:44.584161', 'step': 1638, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:44.612507', 'step': 1638, 'epoch': 1} {'type': 'loss', 'content': 0.011080854572355747, 'timestamp': '2025-09-10 02:27:44.614260', 'step': 1639, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:44.642963', 'step': 1639, 'epoch': 1} {'type': 'loss', 'content': 0.019626542925834656, 'timestamp': '2025-09-10 02:27:44.666041', 'step': 1640, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:44.694693', 'step': 1640, 'epoch': 1} {'type': 'loss', 'content': 0.019304517656564713, 'timestamp': '2025-09-10 02:27:44.696296', 'step': 1641, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:44.725577', 'step': 1641, 'epoch': 1} {'type': 'loss', 'content': 0.024365395307540894, 'timestamp': '2025-09-10 02:27:44.727086', 'step': 1642, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:44.755270', 'step': 1642, 'epoch': 1} {'type': 'loss', 'content': 0.04958360642194748, 'timestamp': '2025-09-10 02:27:44.757053', 'step': 1643, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:44.785805', 'step': 1643, 'epoch': 1} {'type': 'loss', 'content': 0.037695057690143585, 'timestamp': '2025-09-10 02:27:44.808840', 'step': 1644, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:44.837497', 'step': 1644, 'epoch': 1} {'type': 'loss', 'content': 0.019487785175442696, 'timestamp': '2025-09-10 02:27:44.838999', 'step': 1645, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:44.866958', 'step': 1645, 'epoch': 1} {'type': 'loss', 'content': 0.017532160505652428, 'timestamp': '2025-09-10 02:27:44.868474', 'step': 1646, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:44.896893', 'step': 1646, 'epoch': 1} {'type': 'loss', 'content': 0.030309343710541725, 'timestamp': '2025-09-10 02:27:44.898260', 'step': 1647, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:44.926284', 'step': 1647, 'epoch': 1} {'type': 'loss', 'content': 0.05629602074623108, 'timestamp': '2025-09-10 02:27:44.949480', 'step': 1648, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:44.977975', 'step': 1648, 'epoch': 1} {'type': 'loss', 'content': 0.022461825981736183, 'timestamp': '2025-09-10 02:27:44.979529', 'step': 1649, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:45.008306', 'step': 1649, 'epoch': 1} {'type': 'loss', 'content': 0.02396000362932682, 'timestamp': '2025-09-10 02:27:45.009678', 'step': 1650, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:45.037720', 'step': 1650, 'epoch': 1} {'type': 'loss', 'content': 0.02895105816423893, 'timestamp': '2025-09-10 02:27:45.039279', 'step': 1651, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:45.067575', 'step': 1651, 'epoch': 1} {'type': 'loss', 'content': 0.01280116755515337, 'timestamp': '2025-09-10 02:27:45.090333', 'step': 1652, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:45.118525', 'step': 1652, 'epoch': 1} {'type': 'loss', 'content': 0.07098526507616043, 'timestamp': '2025-09-10 02:27:45.119954', 'step': 1653, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:45.148605', 'step': 1653, 'epoch': 1} {'type': 'loss', 'content': 0.016890021041035652, 'timestamp': '2025-09-10 02:27:45.150250', 'step': 1654, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:45.179562', 'step': 1654, 'epoch': 1} {'type': 'loss', 'content': 0.02893208898603916, 'timestamp': '2025-09-10 02:27:45.181092', 'step': 1655, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:45.209545', 'step': 1655, 'epoch': 1} {'type': 'loss', 'content': 0.04043635353446007, 'timestamp': '2025-09-10 02:27:45.232799', 'step': 1656, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:45.261819', 'step': 1656, 'epoch': 1} {'type': 'loss', 'content': 0.03076103888452053, 'timestamp': '2025-09-10 02:27:45.263582', 'step': 1657, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:45.292153', 'step': 1657, 'epoch': 1} {'type': 'loss', 'content': 0.01457404438406229, 'timestamp': '2025-09-10 02:27:45.293799', 'step': 1658, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:45.321999', 'step': 1658, 'epoch': 1} {'type': 'loss', 'content': 0.032336682081222534, 'timestamp': '2025-09-10 02:27:45.323653', 'step': 1659, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:45.351963', 'step': 1659, 'epoch': 1} {'type': 'loss', 'content': 0.04353252425789833, 'timestamp': '2025-09-10 02:27:45.375309', 'step': 1660, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:45.403839', 'step': 1660, 'epoch': 1} {'type': 'loss', 'content': 0.01743830367922783, 'timestamp': '2025-09-10 02:27:45.405345', 'step': 1661, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:45.434321', 'step': 1661, 'epoch': 1} {'type': 'loss', 'content': 0.06116855517029762, 'timestamp': '2025-09-10 02:27:45.435856', 'step': 1662, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:45.464112', 'step': 1662, 'epoch': 1} {'type': 'loss', 'content': 0.02149762213230133, 'timestamp': '2025-09-10 02:27:45.465691', 'step': 1663, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:45.493957', 'step': 1663, 'epoch': 1} {'type': 'loss', 'content': 0.023374240845441818, 'timestamp': '2025-09-10 02:27:45.517054', 'step': 1664, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:45.545705', 'step': 1664, 'epoch': 1} {'type': 'loss', 'content': 0.0031735931988805532, 'timestamp': '2025-09-10 02:27:45.547084', 'step': 1665, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:45.575619', 'step': 1665, 'epoch': 1} {'type': 'loss', 'content': 0.01768544688820839, 'timestamp': '2025-09-10 02:27:45.577131', 'step': 1666, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:45.605225', 'step': 1666, 'epoch': 1} {'type': 'loss', 'content': 0.02468477189540863, 'timestamp': '2025-09-10 02:27:45.606744', 'step': 1667, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:45.634987', 'step': 1667, 'epoch': 1} {'type': 'loss', 'content': 0.02794378623366356, 'timestamp': '2025-09-10 02:27:45.659175', 'step': 1668, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:45.688159', 'step': 1668, 'epoch': 1} {'type': 'loss', 'content': 0.004905772395431995, 'timestamp': '2025-09-10 02:27:45.689583', 'step': 1669, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:45.718566', 'step': 1669, 'epoch': 1} {'type': 'loss', 'content': 0.04774912819266319, 'timestamp': '2025-09-10 02:27:45.720076', 'step': 1670, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:45.749082', 'step': 1670, 'epoch': 1} {'type': 'loss', 'content': 0.005234159994870424, 'timestamp': '2025-09-10 02:27:45.750935', 'step': 1671, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:45.779850', 'step': 1671, 'epoch': 1} {'type': 'loss', 'content': 0.043323665857315063, 'timestamp': '2025-09-10 02:27:45.803094', 'step': 1672, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1582003754624}], 'timestamp': '2025-09-10 02:27:47.722743', 'step': 1672, 'epoch': 1} {'type': 'pplx', 'content': 2764803.1894251006, 'timestamp': '2025-09-10 02:27:47.724332', 'step': 1672, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:47.752431', 'step': 1672, 'epoch': 1} {'type': 'loss', 'content': 0.04397788271307945, 'timestamp': '2025-09-10 02:27:47.754068', 'step': 1673, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:47.782317', 'step': 1673, 'epoch': 1} {'type': 'loss', 'content': 0.03467138856649399, 'timestamp': '2025-09-10 02:27:47.783891', 'step': 1674, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:47.814497', 'step': 1674, 'epoch': 1} {'type': 'loss', 'content': 0.009997214190661907, 'timestamp': '2025-09-10 02:27:47.817057', 'step': 1675, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:47.845456', 'step': 1675, 'epoch': 1} {'type': 'loss', 'content': 0.027228351682424545, 'timestamp': '2025-09-10 02:27:47.868487', 'step': 1676, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:47.897179', 'step': 1676, 'epoch': 1} {'type': 'loss', 'content': 0.020021233707666397, 'timestamp': '2025-09-10 02:27:47.898665', 'step': 1677, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:47.926601', 'step': 1677, 'epoch': 1} {'type': 'loss', 'content': 0.02924380823969841, 'timestamp': '2025-09-10 02:27:47.928228', 'step': 1678, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:47.956528', 'step': 1678, 'epoch': 1} {'type': 'loss', 'content': 0.030104586854577065, 'timestamp': '2025-09-10 02:27:47.958021', 'step': 1679, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:47.986109', 'step': 1679, 'epoch': 1} {'type': 'loss', 'content': 0.031224578619003296, 'timestamp': '2025-09-10 02:27:48.009429', 'step': 1680, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:48.038089', 'step': 1680, 'epoch': 1} {'type': 'loss', 'content': 0.022594813257455826, 'timestamp': '2025-09-10 02:27:48.039667', 'step': 1681, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:48.068067', 'step': 1681, 'epoch': 1} {'type': 'loss', 'content': 0.028602972626686096, 'timestamp': '2025-09-10 02:27:48.069465', 'step': 1682, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:48.098138', 'step': 1682, 'epoch': 1} {'type': 'loss', 'content': 0.01706654205918312, 'timestamp': '2025-09-10 02:27:48.099601', 'step': 1683, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:48.127855', 'step': 1683, 'epoch': 1} {'type': 'loss', 'content': 0.00787374284118414, 'timestamp': '2025-09-10 02:27:48.150975', 'step': 1684, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:48.179784', 'step': 1684, 'epoch': 1} {'type': 'loss', 'content': 0.029157137498259544, 'timestamp': '2025-09-10 02:27:48.181217', 'step': 1685, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:48.209312', 'step': 1685, 'epoch': 1} {'type': 'loss', 'content': 0.0034372094087302685, 'timestamp': '2025-09-10 02:27:48.211067', 'step': 1686, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:48.239376', 'step': 1686, 'epoch': 1} {'type': 'loss', 'content': 0.01937475986778736, 'timestamp': '2025-09-10 02:27:48.240796', 'step': 1687, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:48.268797', 'step': 1687, 'epoch': 1} {'type': 'loss', 'content': 0.041939932852983475, 'timestamp': '2025-09-10 02:27:48.292002', 'step': 1688, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:48.320677', 'step': 1688, 'epoch': 1} {'type': 'loss', 'content': 0.02188614383339882, 'timestamp': '2025-09-10 02:27:48.323506', 'step': 1689, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:48.355816', 'step': 1689, 'epoch': 1} {'type': 'loss', 'content': 0.027482228353619576, 'timestamp': '2025-09-10 02:27:48.357157', 'step': 1690, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:48.385605', 'step': 1690, 'epoch': 1} {'type': 'loss', 'content': 0.03252260759472847, 'timestamp': '2025-09-10 02:27:48.387315', 'step': 1691, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:48.415723', 'step': 1691, 'epoch': 1} {'type': 'loss', 'content': 0.06514390558004379, 'timestamp': '2025-09-10 02:27:48.440111', 'step': 1692, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:48.469992', 'step': 1692, 'epoch': 1} {'type': 'loss', 'content': 0.02514522336423397, 'timestamp': '2025-09-10 02:27:48.471495', 'step': 1693, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:48.500726', 'step': 1693, 'epoch': 1} {'type': 'loss', 'content': 0.005847656633704901, 'timestamp': '2025-09-10 02:27:48.503319', 'step': 1694, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:48.532798', 'step': 1694, 'epoch': 1} {'type': 'loss', 'content': 0.03883161023259163, 'timestamp': '2025-09-10 02:27:48.534584', 'step': 1695, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:48.563253', 'step': 1695, 'epoch': 1} {'type': 'loss', 'content': 0.007636170368641615, 'timestamp': '2025-09-10 02:27:48.586464', 'step': 1696, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:48.615800', 'step': 1696, 'epoch': 1} {'type': 'loss', 'content': 0.022910278290510178, 'timestamp': '2025-09-10 02:27:48.617437', 'step': 1697, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:48.646120', 'step': 1697, 'epoch': 1} {'type': 'loss', 'content': 0.030868876725435257, 'timestamp': '2025-09-10 02:27:48.647825', 'step': 1698, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:48.676971', 'step': 1698, 'epoch': 1} {'type': 'loss', 'content': 0.031839769333601, 'timestamp': '2025-09-10 02:27:48.678378', 'step': 1699, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:48.707246', 'step': 1699, 'epoch': 1} {'type': 'loss', 'content': 0.050692055374383926, 'timestamp': '2025-09-10 02:27:48.730366', 'step': 1700, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:48.759234', 'step': 1700, 'epoch': 1} {'type': 'loss', 'content': 0.04352886602282524, 'timestamp': '2025-09-10 02:27:48.760993', 'step': 1701, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:48.790031', 'step': 1701, 'epoch': 1} {'type': 'loss', 'content': 0.036653898656368256, 'timestamp': '2025-09-10 02:27:48.791430', 'step': 1702, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:48.819724', 'step': 1702, 'epoch': 1} {'type': 'loss', 'content': 0.06488614529371262, 'timestamp': '2025-09-10 02:27:48.821457', 'step': 1703, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:48.850112', 'step': 1703, 'epoch': 1} {'type': 'loss', 'content': 0.012786167673766613, 'timestamp': '2025-09-10 02:27:48.873196', 'step': 1704, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:48.901970', 'step': 1704, 'epoch': 1} {'type': 'loss', 'content': 0.043236348778009415, 'timestamp': '2025-09-10 02:27:48.903382', 'step': 1705, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:48.931792', 'step': 1705, 'epoch': 1} {'type': 'loss', 'content': 0.02485862746834755, 'timestamp': '2025-09-10 02:27:48.933431', 'step': 1706, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:48.962474', 'step': 1706, 'epoch': 1} {'type': 'loss', 'content': 0.05799518898129463, 'timestamp': '2025-09-10 02:27:48.963975', 'step': 1707, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:48.992520', 'step': 1707, 'epoch': 1} {'type': 'loss', 'content': 0.01807405613362789, 'timestamp': '2025-09-10 02:27:49.015700', 'step': 1708, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:49.044885', 'step': 1708, 'epoch': 1} {'type': 'loss', 'content': 0.008302552625536919, 'timestamp': '2025-09-10 02:27:49.046323', 'step': 1709, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:49.075157', 'step': 1709, 'epoch': 1} {'type': 'loss', 'content': 0.014733013696968555, 'timestamp': '2025-09-10 02:27:49.076992', 'step': 1710, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:49.105626', 'step': 1710, 'epoch': 1} {'type': 'loss', 'content': 0.06290986388921738, 'timestamp': '2025-09-10 02:27:49.107098', 'step': 1711, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:49.135687', 'step': 1711, 'epoch': 1} {'type': 'loss', 'content': 0.0030225724913179874, 'timestamp': '2025-09-10 02:27:49.158848', 'step': 1712, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:49.187977', 'step': 1712, 'epoch': 1} {'type': 'loss', 'content': 0.009493774734437466, 'timestamp': '2025-09-10 02:27:49.189443', 'step': 1713, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:49.218142', 'step': 1713, 'epoch': 1} {'type': 'loss', 'content': 0.0634767934679985, 'timestamp': '2025-09-10 02:27:49.219855', 'step': 1714, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:49.248400', 'step': 1714, 'epoch': 1} {'type': 'loss', 'content': 0.045059945434331894, 'timestamp': '2025-09-10 02:27:49.250064', 'step': 1715, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:49.278627', 'step': 1715, 'epoch': 1} {'type': 'loss', 'content': 0.009607000276446342, 'timestamp': '2025-09-10 02:27:49.301556', 'step': 1716, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:49.330777', 'step': 1716, 'epoch': 1} {'type': 'loss', 'content': 0.02127017080783844, 'timestamp': '2025-09-10 02:27:49.332219', 'step': 1717, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:49.360819', 'step': 1717, 'epoch': 1} {'type': 'loss', 'content': 0.05764401704072952, 'timestamp': '2025-09-10 02:27:49.362490', 'step': 1718, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:49.390787', 'step': 1718, 'epoch': 1} {'type': 'loss', 'content': 0.06865488737821579, 'timestamp': '2025-09-10 02:27:49.392270', 'step': 1719, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:49.421124', 'step': 1719, 'epoch': 1} {'type': 'loss', 'content': 0.05024939775466919, 'timestamp': '2025-09-10 02:27:49.444316', 'step': 1720, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:49.473616', 'step': 1720, 'epoch': 1} {'type': 'loss', 'content': 0.022084476426243782, 'timestamp': '2025-09-10 02:27:49.475276', 'step': 1721, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:49.504609', 'step': 1721, 'epoch': 1} {'type': 'loss', 'content': 0.038758330047130585, 'timestamp': '2025-09-10 02:27:49.507113', 'step': 1722, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:49.537841', 'step': 1722, 'epoch': 1} {'type': 'loss', 'content': 0.04677200689911842, 'timestamp': '2025-09-10 02:27:49.539490', 'step': 1723, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:49.568801', 'step': 1723, 'epoch': 1} {'type': 'loss', 'content': 0.01305408775806427, 'timestamp': '2025-09-10 02:27:49.591856', 'step': 1724, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:49.620897', 'step': 1724, 'epoch': 1} {'type': 'loss', 'content': 0.012123716995120049, 'timestamp': '2025-09-10 02:27:49.623010', 'step': 1725, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:49.652003', 'step': 1725, 'epoch': 1} {'type': 'loss', 'content': 0.04148506000638008, 'timestamp': '2025-09-10 02:27:49.653670', 'step': 1726, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:49.682319', 'step': 1726, 'epoch': 1} {'type': 'loss', 'content': 0.010827691294252872, 'timestamp': '2025-09-10 02:27:49.683948', 'step': 1727, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:49.712397', 'step': 1727, 'epoch': 1} {'type': 'loss', 'content': 0.04800831526517868, 'timestamp': '2025-09-10 02:27:49.735690', 'step': 1728, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:49.764917', 'step': 1728, 'epoch': 1} {'type': 'loss', 'content': 0.01778292842209339, 'timestamp': '2025-09-10 02:27:49.766504', 'step': 1729, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:49.795146', 'step': 1729, 'epoch': 1} {'type': 'loss', 'content': 0.03920115903019905, 'timestamp': '2025-09-10 02:27:49.796779', 'step': 1730, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:49.825249', 'step': 1730, 'epoch': 1} {'type': 'loss', 'content': 0.018084228038787842, 'timestamp': '2025-09-10 02:27:49.826880', 'step': 1731, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:49.856238', 'step': 1731, 'epoch': 1} {'type': 'loss', 'content': 0.04701170325279236, 'timestamp': '2025-09-10 02:27:49.879640', 'step': 1732, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:49.908641', 'step': 1732, 'epoch': 1} {'type': 'loss', 'content': 0.013462777249515057, 'timestamp': '2025-09-10 02:27:49.910241', 'step': 1733, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:49.938941', 'step': 1733, 'epoch': 1} {'type': 'loss', 'content': 0.0071736471727490425, 'timestamp': '2025-09-10 02:27:49.940544', 'step': 1734, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:49.968959', 'step': 1734, 'epoch': 1} {'type': 'loss', 'content': 0.030766667798161507, 'timestamp': '2025-09-10 02:27:49.971789', 'step': 1735, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:50.001703', 'step': 1735, 'epoch': 1} {'type': 'loss', 'content': 0.054909367114305496, 'timestamp': '2025-09-10 02:27:50.025043', 'step': 1736, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:50.054108', 'step': 1736, 'epoch': 1} {'type': 'loss', 'content': 0.03182930126786232, 'timestamp': '2025-09-10 02:27:50.055971', 'step': 1737, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:50.085056', 'step': 1737, 'epoch': 1} {'type': 'loss', 'content': 0.05137627571821213, 'timestamp': '2025-09-10 02:27:50.086958', 'step': 1738, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:50.115597', 'step': 1738, 'epoch': 1} {'type': 'loss', 'content': 0.056904930621385574, 'timestamp': '2025-09-10 02:27:50.117207', 'step': 1739, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:50.145728', 'step': 1739, 'epoch': 1} {'type': 'loss', 'content': 0.006250171922147274, 'timestamp': '2025-09-10 02:27:50.168861', 'step': 1740, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:50.197439', 'step': 1740, 'epoch': 1} {'type': 'loss', 'content': 0.03876912593841553, 'timestamp': '2025-09-10 02:27:50.199040', 'step': 1741, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:50.227679', 'step': 1741, 'epoch': 1} {'type': 'loss', 'content': 0.014502237550914288, 'timestamp': '2025-09-10 02:27:50.229234', 'step': 1742, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:50.257954', 'step': 1742, 'epoch': 1} {'type': 'loss', 'content': 0.01644100435078144, 'timestamp': '2025-09-10 02:27:50.259506', 'step': 1743, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:50.288121', 'step': 1743, 'epoch': 1} {'type': 'loss', 'content': 0.0441061295568943, 'timestamp': '2025-09-10 02:27:50.311067', 'step': 1744, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:50.340038', 'step': 1744, 'epoch': 1} {'type': 'loss', 'content': 0.005406923592090607, 'timestamp': '2025-09-10 02:27:50.341728', 'step': 1745, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:50.370447', 'step': 1745, 'epoch': 1} {'type': 'loss', 'content': 0.01910267397761345, 'timestamp': '2025-09-10 02:27:50.371945', 'step': 1746, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:50.400075', 'step': 1746, 'epoch': 1} {'type': 'loss', 'content': 0.04482027888298035, 'timestamp': '2025-09-10 02:27:50.401461', 'step': 1747, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:50.429669', 'step': 1747, 'epoch': 1} {'type': 'loss', 'content': 0.05291486158967018, 'timestamp': '2025-09-10 02:27:50.452750', 'step': 1748, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:50.481527', 'step': 1748, 'epoch': 1} {'type': 'loss', 'content': 0.008437646552920341, 'timestamp': '2025-09-10 02:27:50.483234', 'step': 1749, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:50.511909', 'step': 1749, 'epoch': 1} {'type': 'loss', 'content': 0.04366893693804741, 'timestamp': '2025-09-10 02:27:50.513503', 'step': 1750, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:50.542158', 'step': 1750, 'epoch': 1} {'type': 'loss', 'content': 0.02228725515305996, 'timestamp': '2025-09-10 02:27:50.543781', 'step': 1751, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:50.572903', 'step': 1751, 'epoch': 1} {'type': 'loss', 'content': 0.02205096371471882, 'timestamp': '2025-09-10 02:27:50.595986', 'step': 1752, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:50.625500', 'step': 1752, 'epoch': 1} {'type': 'loss', 'content': 0.02804812788963318, 'timestamp': '2025-09-10 02:27:50.627233', 'step': 1753, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:50.655999', 'step': 1753, 'epoch': 1} {'type': 'loss', 'content': 0.043668944388628006, 'timestamp': '2025-09-10 02:27:50.657822', 'step': 1754, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:50.686459', 'step': 1754, 'epoch': 1} {'type': 'loss', 'content': 0.03522314503788948, 'timestamp': '2025-09-10 02:27:50.688046', 'step': 1755, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:50.716668', 'step': 1755, 'epoch': 1} {'type': 'loss', 'content': 0.007187033537775278, 'timestamp': '2025-09-10 02:27:50.739859', 'step': 1756, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:50.768181', 'step': 1756, 'epoch': 1} {'type': 'loss', 'content': 0.048044055700302124, 'timestamp': '2025-09-10 02:27:50.769532', 'step': 1757, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:50.798599', 'step': 1757, 'epoch': 1} {'type': 'loss', 'content': 0.023681072518229485, 'timestamp': '2025-09-10 02:27:50.799979', 'step': 1758, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:50.828197', 'step': 1758, 'epoch': 1} {'type': 'loss', 'content': 0.043920863419771194, 'timestamp': '2025-09-10 02:27:50.829783', 'step': 1759, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:50.859057', 'step': 1759, 'epoch': 1} {'type': 'loss', 'content': 0.015966571867465973, 'timestamp': '2025-09-10 02:27:50.882053', 'step': 1760, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:50.911058', 'step': 1760, 'epoch': 1} {'type': 'loss', 'content': 0.024546165019273758, 'timestamp': '2025-09-10 02:27:50.912429', 'step': 1761, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:50.941299', 'step': 1761, 'epoch': 1} {'type': 'loss', 'content': 0.03937734290957451, 'timestamp': '2025-09-10 02:27:50.943017', 'step': 1762, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:50.972033', 'step': 1762, 'epoch': 1} {'type': 'loss', 'content': 0.015845147892832756, 'timestamp': '2025-09-10 02:27:50.974035', 'step': 1763, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:51.002798', 'step': 1763, 'epoch': 1} {'type': 'loss', 'content': 0.02144877426326275, 'timestamp': '2025-09-10 02:27:51.026094', 'step': 1764, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:51.054897', 'step': 1764, 'epoch': 1} {'type': 'loss', 'content': 0.019191885367035866, 'timestamp': '2025-09-10 02:27:51.056748', 'step': 1765, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:51.084970', 'step': 1765, 'epoch': 1} {'type': 'loss', 'content': 0.023099040612578392, 'timestamp': '2025-09-10 02:27:51.086658', 'step': 1766, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:51.115093', 'step': 1766, 'epoch': 1} {'type': 'loss', 'content': 0.022091463208198547, 'timestamp': '2025-09-10 02:27:51.116947', 'step': 1767, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:51.145495', 'step': 1767, 'epoch': 1} {'type': 'loss', 'content': 0.018582088872790337, 'timestamp': '2025-09-10 02:27:51.168999', 'step': 1768, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:51.197556', 'step': 1768, 'epoch': 1} {'type': 'loss', 'content': 0.02827291376888752, 'timestamp': '2025-09-10 02:27:51.199126', 'step': 1769, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:51.227635', 'step': 1769, 'epoch': 1} {'type': 'loss', 'content': 0.03188592568039894, 'timestamp': '2025-09-10 02:27:51.229246', 'step': 1770, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:51.258014', 'step': 1770, 'epoch': 1} {'type': 'loss', 'content': 0.03314082697033882, 'timestamp': '2025-09-10 02:27:51.259780', 'step': 1771, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:51.288468', 'step': 1771, 'epoch': 1} {'type': 'loss', 'content': 0.022767921909689903, 'timestamp': '2025-09-10 02:27:51.311705', 'step': 1772, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:51.340467', 'step': 1772, 'epoch': 1} {'type': 'loss', 'content': 0.03807010129094124, 'timestamp': '2025-09-10 02:27:51.342367', 'step': 1773, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:51.370850', 'step': 1773, 'epoch': 1} {'type': 'loss', 'content': 0.04123767092823982, 'timestamp': '2025-09-10 02:27:51.372394', 'step': 1774, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:51.401036', 'step': 1774, 'epoch': 1} {'type': 'loss', 'content': 0.024406859651207924, 'timestamp': '2025-09-10 02:27:51.402413', 'step': 1775, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:51.430962', 'step': 1775, 'epoch': 1} {'type': 'loss', 'content': 0.019260989502072334, 'timestamp': '2025-09-10 02:27:51.455116', 'step': 1776, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:51.484089', 'step': 1776, 'epoch': 1} {'type': 'loss', 'content': 0.014728260226547718, 'timestamp': '2025-09-10 02:27:51.485497', 'step': 1777, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:51.514367', 'step': 1777, 'epoch': 1} {'type': 'loss', 'content': 0.016539910808205605, 'timestamp': '2025-09-10 02:27:51.515779', 'step': 1778, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:51.544624', 'step': 1778, 'epoch': 1} {'type': 'loss', 'content': 0.048096511512994766, 'timestamp': '2025-09-10 02:27:51.546124', 'step': 1779, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:51.574792', 'step': 1779, 'epoch': 1} {'type': 'loss', 'content': 0.0534992590546608, 'timestamp': '2025-09-10 02:27:51.597820', 'step': 1780, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:51.626356', 'step': 1780, 'epoch': 1} {'type': 'loss', 'content': 0.00945852417498827, 'timestamp': '2025-09-10 02:27:51.628257', 'step': 1781, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:51.657167', 'step': 1781, 'epoch': 1} {'type': 'loss', 'content': 0.027953948825597763, 'timestamp': '2025-09-10 02:27:51.659144', 'step': 1782, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:51.687641', 'step': 1782, 'epoch': 1} {'type': 'loss', 'content': 0.03516707941889763, 'timestamp': '2025-09-10 02:27:51.689466', 'step': 1783, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:51.717812', 'step': 1783, 'epoch': 1} {'type': 'loss', 'content': 0.033107005059719086, 'timestamp': '2025-09-10 02:27:51.741063', 'step': 1784, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:51.770018', 'step': 1784, 'epoch': 1} {'type': 'loss', 'content': 0.0065910592675209045, 'timestamp': '2025-09-10 02:27:51.771652', 'step': 1785, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:51.799944', 'step': 1785, 'epoch': 1} {'type': 'loss', 'content': 0.032838717103004456, 'timestamp': '2025-09-10 02:27:51.801570', 'step': 1786, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:51.830060', 'step': 1786, 'epoch': 1} {'type': 'loss', 'content': 0.0056396001018583775, 'timestamp': '2025-09-10 02:27:51.831656', 'step': 1787, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:51.860033', 'step': 1787, 'epoch': 1} {'type': 'loss', 'content': 0.00763358548283577, 'timestamp': '2025-09-10 02:27:51.883250', 'step': 1788, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:51.911800', 'step': 1788, 'epoch': 1} {'type': 'loss', 'content': 0.0351736806333065, 'timestamp': '2025-09-10 02:27:51.913371', 'step': 1789, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:51.942061', 'step': 1789, 'epoch': 1} {'type': 'loss', 'content': 0.023263007402420044, 'timestamp': '2025-09-10 02:27:51.943778', 'step': 1790, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:51.972170', 'step': 1790, 'epoch': 1} {'type': 'loss', 'content': 0.014233228750526905, 'timestamp': '2025-09-10 02:27:51.973824', 'step': 1791, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:52.002715', 'step': 1791, 'epoch': 1} {'type': 'loss', 'content': 0.012082560919225216, 'timestamp': '2025-09-10 02:27:52.025890', 'step': 1792, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:52.054958', 'step': 1792, 'epoch': 1} {'type': 'loss', 'content': 0.03389746695756912, 'timestamp': '2025-09-10 02:27:52.056834', 'step': 1793, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:52.092779', 'step': 1793, 'epoch': 1} {'type': 'loss', 'content': 0.021390317007899284, 'timestamp': '2025-09-10 02:27:52.094305', 'step': 1794, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:52.123961', 'step': 1794, 'epoch': 1} {'type': 'loss', 'content': 0.011533044278621674, 'timestamp': '2025-09-10 02:27:52.125544', 'step': 1795, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:52.153627', 'step': 1795, 'epoch': 1} {'type': 'loss', 'content': 0.04315054789185524, 'timestamp': '2025-09-10 02:27:52.177096', 'step': 1796, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:52.205618', 'step': 1796, 'epoch': 1} {'type': 'loss', 'content': 0.04506099969148636, 'timestamp': '2025-09-10 02:27:52.207112', 'step': 1797, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:52.236476', 'step': 1797, 'epoch': 1} {'type': 'loss', 'content': 0.023977842181921005, 'timestamp': '2025-09-10 02:27:52.238015', 'step': 1798, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:52.266445', 'step': 1798, 'epoch': 1} {'type': 'loss', 'content': 0.04149536415934563, 'timestamp': '2025-09-10 02:27:52.267822', 'step': 1799, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:52.296197', 'step': 1799, 'epoch': 1} {'type': 'loss', 'content': 0.016618778929114342, 'timestamp': '2025-09-10 02:27:52.319183', 'step': 1800, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:52.347810', 'step': 1800, 'epoch': 1} {'type': 'loss', 'content': 0.028144029900431633, 'timestamp': '2025-09-10 02:27:52.349098', 'step': 1801, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:52.377491', 'step': 1801, 'epoch': 1} {'type': 'loss', 'content': 0.006141290534287691, 'timestamp': '2025-09-10 02:27:52.378890', 'step': 1802, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:52.406995', 'step': 1802, 'epoch': 1} {'type': 'loss', 'content': 0.013461337424814701, 'timestamp': '2025-09-10 02:27:52.408834', 'step': 1803, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:52.436730', 'step': 1803, 'epoch': 1} {'type': 'loss', 'content': 0.015965865924954414, 'timestamp': '2025-09-10 02:27:52.460006', 'step': 1804, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:52.488941', 'step': 1804, 'epoch': 1} {'type': 'loss', 'content': 0.02222426049411297, 'timestamp': '2025-09-10 02:27:52.490695', 'step': 1805, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:52.519093', 'step': 1805, 'epoch': 1} {'type': 'loss', 'content': 0.02876662276685238, 'timestamp': '2025-09-10 02:27:52.520877', 'step': 1806, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:52.549576', 'step': 1806, 'epoch': 1} {'type': 'loss', 'content': 0.0073358905501663685, 'timestamp': '2025-09-10 02:27:52.551237', 'step': 1807, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:52.579334', 'step': 1807, 'epoch': 1} {'type': 'loss', 'content': 0.09105312824249268, 'timestamp': '2025-09-10 02:27:52.602652', 'step': 1808, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:52.630955', 'step': 1808, 'epoch': 1} {'type': 'loss', 'content': 0.03719841688871384, 'timestamp': '2025-09-10 02:27:52.632810', 'step': 1809, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:52.661080', 'step': 1809, 'epoch': 1} {'type': 'loss', 'content': 0.009422372095286846, 'timestamp': '2025-09-10 02:27:52.662485', 'step': 1810, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:52.691205', 'step': 1810, 'epoch': 1} {'type': 'loss', 'content': 0.045731209218502045, 'timestamp': '2025-09-10 02:27:52.692998', 'step': 1811, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:52.721224', 'step': 1811, 'epoch': 1} {'type': 'loss', 'content': 0.03382372483611107, 'timestamp': '2025-09-10 02:27:52.744539', 'step': 1812, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:52.773218', 'step': 1812, 'epoch': 1} {'type': 'loss', 'content': 0.01654665358364582, 'timestamp': '2025-09-10 02:27:52.774861', 'step': 1813, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:52.803448', 'step': 1813, 'epoch': 1} {'type': 'loss', 'content': 0.07755956798791885, 'timestamp': '2025-09-10 02:27:52.805141', 'step': 1814, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:52.833581', 'step': 1814, 'epoch': 1} {'type': 'loss', 'content': 0.09635764360427856, 'timestamp': '2025-09-10 02:27:52.835169', 'step': 1815, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:52.863702', 'step': 1815, 'epoch': 1} {'type': 'loss', 'content': 0.013378841802477837, 'timestamp': '2025-09-10 02:27:52.886950', 'step': 1816, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:52.916031', 'step': 1816, 'epoch': 1} {'type': 'loss', 'content': 0.023112650960683823, 'timestamp': '2025-09-10 02:27:52.917431', 'step': 1817, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:52.945574', 'step': 1817, 'epoch': 1} {'type': 'loss', 'content': 0.018076254054903984, 'timestamp': '2025-09-10 02:27:52.947053', 'step': 1818, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:52.975556', 'step': 1818, 'epoch': 1} {'type': 'loss', 'content': 0.010910548269748688, 'timestamp': '2025-09-10 02:27:52.977176', 'step': 1819, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:53.005951', 'step': 1819, 'epoch': 1} {'type': 'loss', 'content': 0.028995469212532043, 'timestamp': '2025-09-10 02:27:53.029178', 'step': 1820, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:53.057949', 'step': 1820, 'epoch': 1} {'type': 'loss', 'content': 0.05423533916473389, 'timestamp': '2025-09-10 02:27:53.059432', 'step': 1821, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:53.087928', 'step': 1821, 'epoch': 1} {'type': 'loss', 'content': 0.016812141984701157, 'timestamp': '2025-09-10 02:27:53.089337', 'step': 1822, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:53.117903', 'step': 1822, 'epoch': 1} {'type': 'loss', 'content': 0.004353975411504507, 'timestamp': '2025-09-10 02:27:53.119348', 'step': 1823, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:53.148327', 'step': 1823, 'epoch': 1} {'type': 'loss', 'content': 0.02070428803563118, 'timestamp': '2025-09-10 02:27:53.171449', 'step': 1824, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1582003754624}], 'timestamp': '2025-09-10 02:27:55.044426', 'step': 1824, 'epoch': 1} {'type': 'pplx', 'content': 2774156.6199907036, 'timestamp': '2025-09-10 02:27:55.046119', 'step': 1824, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:55.074027', 'step': 1824, 'epoch': 1} {'type': 'loss', 'content': 0.04381697624921799, 'timestamp': '2025-09-10 02:27:55.075721', 'step': 1825, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:55.104334', 'step': 1825, 'epoch': 1} {'type': 'loss', 'content': 0.015221195295453072, 'timestamp': '2025-09-10 02:27:55.111881', 'step': 1826, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:55.142339', 'step': 1826, 'epoch': 1} {'type': 'loss', 'content': 0.018304746598005295, 'timestamp': '2025-09-10 02:27:55.143992', 'step': 1827, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:55.172284', 'step': 1827, 'epoch': 1} {'type': 'loss', 'content': 0.005829904694110155, 'timestamp': '2025-09-10 02:27:55.195380', 'step': 1828, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:55.226280', 'step': 1828, 'epoch': 1} {'type': 'loss', 'content': 0.08773591369390488, 'timestamp': '2025-09-10 02:27:55.227891', 'step': 1829, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:55.256005', 'step': 1829, 'epoch': 1} {'type': 'loss', 'content': 0.06778901070356369, 'timestamp': '2025-09-10 02:27:55.259664', 'step': 1830, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:55.291290', 'step': 1830, 'epoch': 1} {'type': 'loss', 'content': 0.01809806562960148, 'timestamp': '2025-09-10 02:27:55.293003', 'step': 1831, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:55.320927', 'step': 1831, 'epoch': 1} {'type': 'loss', 'content': 0.035624559968709946, 'timestamp': '2025-09-10 02:27:55.343892', 'step': 1832, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:55.372533', 'step': 1832, 'epoch': 1} {'type': 'loss', 'content': 0.02435780130326748, 'timestamp': '2025-09-10 02:27:55.374215', 'step': 1833, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:55.402562', 'step': 1833, 'epoch': 1} {'type': 'loss', 'content': 0.028359994292259216, 'timestamp': '2025-09-10 02:27:55.405247', 'step': 1834, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:55.436295', 'step': 1834, 'epoch': 1} {'type': 'loss', 'content': 0.01486204657703638, 'timestamp': '2025-09-10 02:27:55.438308', 'step': 1835, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:55.467550', 'step': 1835, 'epoch': 1} {'type': 'loss', 'content': 0.013306444510817528, 'timestamp': '2025-09-10 02:27:55.490808', 'step': 1836, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:55.519785', 'step': 1836, 'epoch': 1} {'type': 'loss', 'content': 0.021247971802949905, 'timestamp': '2025-09-10 02:27:55.521520', 'step': 1837, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:55.549764', 'step': 1837, 'epoch': 1} {'type': 'loss', 'content': 0.012678244151175022, 'timestamp': '2025-09-10 02:27:55.551521', 'step': 1838, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:55.580482', 'step': 1838, 'epoch': 1} {'type': 'loss', 'content': 0.07287485152482986, 'timestamp': '2025-09-10 02:27:55.582110', 'step': 1839, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:55.611036', 'step': 1839, 'epoch': 1} {'type': 'loss', 'content': 0.014401237480342388, 'timestamp': '2025-09-10 02:27:55.634267', 'step': 1840, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:55.662744', 'step': 1840, 'epoch': 1} {'type': 'loss', 'content': 0.037539489567279816, 'timestamp': '2025-09-10 02:27:55.664501', 'step': 1841, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:55.693288', 'step': 1841, 'epoch': 1} {'type': 'loss', 'content': 0.032391078770160675, 'timestamp': '2025-09-10 02:27:55.694975', 'step': 1842, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:55.723259', 'step': 1842, 'epoch': 1} {'type': 'loss', 'content': 0.005160854198038578, 'timestamp': '2025-09-10 02:27:55.725047', 'step': 1843, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:55.753635', 'step': 1843, 'epoch': 1} {'type': 'loss', 'content': 0.03755679354071617, 'timestamp': '2025-09-10 02:27:55.776834', 'step': 1844, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:55.805580', 'step': 1844, 'epoch': 1} {'type': 'loss', 'content': 0.06378152221441269, 'timestamp': '2025-09-10 02:27:55.807021', 'step': 1845, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:55.835711', 'step': 1845, 'epoch': 1} {'type': 'loss', 'content': 0.008414468728005886, 'timestamp': '2025-09-10 02:27:55.837411', 'step': 1846, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:55.866195', 'step': 1846, 'epoch': 1} {'type': 'loss', 'content': 0.04518602043390274, 'timestamp': '2025-09-10 02:27:55.867807', 'step': 1847, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:55.896849', 'step': 1847, 'epoch': 1} {'type': 'loss', 'content': 0.03599895164370537, 'timestamp': '2025-09-10 02:27:55.920171', 'step': 1848, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:55.948396', 'step': 1848, 'epoch': 1} {'type': 'loss', 'content': 0.03463702276349068, 'timestamp': '2025-09-10 02:27:55.949980', 'step': 1849, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:55.977963', 'step': 1849, 'epoch': 1} {'type': 'loss', 'content': 0.05188743397593498, 'timestamp': '2025-09-10 02:27:55.979559', 'step': 1850, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:56.008450', 'step': 1850, 'epoch': 1} {'type': 'loss', 'content': 0.04317900538444519, 'timestamp': '2025-09-10 02:27:56.009918', 'step': 1851, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:56.038346', 'step': 1851, 'epoch': 1} {'type': 'loss', 'content': 0.03403066471219063, 'timestamp': '2025-09-10 02:27:56.061331', 'step': 1852, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:56.089500', 'step': 1852, 'epoch': 1} {'type': 'loss', 'content': 0.016021737828850746, 'timestamp': '2025-09-10 02:27:56.091131', 'step': 1853, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:56.119634', 'step': 1853, 'epoch': 1} {'type': 'loss', 'content': 0.053274061530828476, 'timestamp': '2025-09-10 02:27:56.121249', 'step': 1854, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:56.149369', 'step': 1854, 'epoch': 1} {'type': 'loss', 'content': 0.027471955865621567, 'timestamp': '2025-09-10 02:27:56.151139', 'step': 1855, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:56.179492', 'step': 1855, 'epoch': 1} {'type': 'loss', 'content': 0.03777465969324112, 'timestamp': '2025-09-10 02:27:56.202586', 'step': 1856, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:56.231535', 'step': 1856, 'epoch': 1} {'type': 'loss', 'content': 0.01911143772304058, 'timestamp': '2025-09-10 02:27:56.233235', 'step': 1857, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:56.261914', 'step': 1857, 'epoch': 1} {'type': 'loss', 'content': 0.008062859997153282, 'timestamp': '2025-09-10 02:27:56.263870', 'step': 1858, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:56.292323', 'step': 1858, 'epoch': 1} {'type': 'loss', 'content': 0.03436744958162308, 'timestamp': '2025-09-10 02:27:56.294145', 'step': 1859, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:56.322601', 'step': 1859, 'epoch': 1} {'type': 'loss', 'content': 0.00785265862941742, 'timestamp': '2025-09-10 02:27:56.345787', 'step': 1860, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:56.374393', 'step': 1860, 'epoch': 1} {'type': 'loss', 'content': 0.05875316634774208, 'timestamp': '2025-09-10 02:27:56.376141', 'step': 1861, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:56.404486', 'step': 1861, 'epoch': 1} {'type': 'loss', 'content': 0.014822714030742645, 'timestamp': '2025-09-10 02:27:56.406234', 'step': 1862, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:56.434078', 'step': 1862, 'epoch': 1} {'type': 'loss', 'content': 0.07236051559448242, 'timestamp': '2025-09-10 02:27:56.436222', 'step': 1863, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:56.464640', 'step': 1863, 'epoch': 1} {'type': 'loss', 'content': 0.029560577124357224, 'timestamp': '2025-09-10 02:27:56.487937', 'step': 1864, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:56.516429', 'step': 1864, 'epoch': 1} {'type': 'loss', 'content': 0.04660656675696373, 'timestamp': '2025-09-10 02:27:56.518046', 'step': 1865, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:56.546153', 'step': 1865, 'epoch': 1} {'type': 'loss', 'content': 0.0362875834107399, 'timestamp': '2025-09-10 02:27:56.548051', 'step': 1866, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:56.576504', 'step': 1866, 'epoch': 1} {'type': 'loss', 'content': 0.011777542531490326, 'timestamp': '2025-09-10 02:27:56.578155', 'step': 1867, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:56.606576', 'step': 1867, 'epoch': 1} {'type': 'loss', 'content': 0.022310929372906685, 'timestamp': '2025-09-10 02:27:56.629552', 'step': 1868, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:56.658112', 'step': 1868, 'epoch': 1} {'type': 'loss', 'content': 0.02191806770861149, 'timestamp': '2025-09-10 02:27:56.659698', 'step': 1869, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:56.687668', 'step': 1869, 'epoch': 1} {'type': 'loss', 'content': 0.036225225776433945, 'timestamp': '2025-09-10 02:27:56.689096', 'step': 1870, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:56.717260', 'step': 1870, 'epoch': 1} {'type': 'loss', 'content': 0.013109169900417328, 'timestamp': '2025-09-10 02:27:56.718550', 'step': 1871, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:56.746228', 'step': 1871, 'epoch': 1} {'type': 'loss', 'content': 0.02598562464118004, 'timestamp': '2025-09-10 02:27:56.769271', 'step': 1872, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:56.797699', 'step': 1872, 'epoch': 1} {'type': 'loss', 'content': 0.046570923179388046, 'timestamp': '2025-09-10 02:27:56.799236', 'step': 1873, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:56.827812', 'step': 1873, 'epoch': 1} {'type': 'loss', 'content': 0.023213813081383705, 'timestamp': '2025-09-10 02:27:56.829290', 'step': 1874, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:56.857533', 'step': 1874, 'epoch': 1} {'type': 'loss', 'content': 0.016777101904153824, 'timestamp': '2025-09-10 02:27:56.859059', 'step': 1875, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:56.887138', 'step': 1875, 'epoch': 1} {'type': 'loss', 'content': 0.017548279836773872, 'timestamp': '2025-09-10 02:27:56.910147', 'step': 1876, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:56.937931', 'step': 1876, 'epoch': 1} {'type': 'loss', 'content': 0.013116302900016308, 'timestamp': '2025-09-10 02:27:56.939598', 'step': 1877, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:56.967669', 'step': 1877, 'epoch': 1} {'type': 'loss', 'content': 0.04197695478796959, 'timestamp': '2025-09-10 02:27:56.969306', 'step': 1878, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:56.997603', 'step': 1878, 'epoch': 1} {'type': 'loss', 'content': 0.021345878019928932, 'timestamp': '2025-09-10 02:27:56.999307', 'step': 1879, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:57.027495', 'step': 1879, 'epoch': 1} {'type': 'loss', 'content': 0.021132279187440872, 'timestamp': '2025-09-10 02:27:57.050416', 'step': 1880, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:57.078538', 'step': 1880, 'epoch': 1} {'type': 'loss', 'content': 0.018463974818587303, 'timestamp': '2025-09-10 02:27:57.079967', 'step': 1881, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:57.108136', 'step': 1881, 'epoch': 1} {'type': 'loss', 'content': 0.009572668001055717, 'timestamp': '2025-09-10 02:27:57.109593', 'step': 1882, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:57.137622', 'step': 1882, 'epoch': 1} {'type': 'loss', 'content': 0.026923833414912224, 'timestamp': '2025-09-10 02:27:57.139341', 'step': 1883, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:57.167719', 'step': 1883, 'epoch': 1} {'type': 'loss', 'content': 0.015519418753683567, 'timestamp': '2025-09-10 02:27:57.190900', 'step': 1884, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:57.219265', 'step': 1884, 'epoch': 1} {'type': 'loss', 'content': 0.03241514042019844, 'timestamp': '2025-09-10 02:27:57.220852', 'step': 1885, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:57.248956', 'step': 1885, 'epoch': 1} {'type': 'loss', 'content': 0.013597256503999233, 'timestamp': '2025-09-10 02:27:57.250464', 'step': 1886, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:57.278928', 'step': 1886, 'epoch': 1} {'type': 'loss', 'content': 0.04358609393239021, 'timestamp': '2025-09-10 02:27:57.280393', 'step': 1887, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:57.308394', 'step': 1887, 'epoch': 1} {'type': 'loss', 'content': 0.040268637239933014, 'timestamp': '2025-09-10 02:27:57.331592', 'step': 1888, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:57.359754', 'step': 1888, 'epoch': 1} {'type': 'loss', 'content': 0.05240333825349808, 'timestamp': '2025-09-10 02:27:57.361378', 'step': 1889, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:57.389827', 'step': 1889, 'epoch': 1} {'type': 'loss', 'content': 0.007719197776168585, 'timestamp': '2025-09-10 02:27:57.391255', 'step': 1890, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:57.419479', 'step': 1890, 'epoch': 1} {'type': 'loss', 'content': 0.03898076340556145, 'timestamp': '2025-09-10 02:27:57.420754', 'step': 1891, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:57.448447', 'step': 1891, 'epoch': 1} {'type': 'loss', 'content': 0.06327030807733536, 'timestamp': '2025-09-10 02:27:57.471573', 'step': 1892, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:57.500236', 'step': 1892, 'epoch': 1} {'type': 'loss', 'content': 0.018281977623701096, 'timestamp': '2025-09-10 02:27:57.501908', 'step': 1893, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:57.530238', 'step': 1893, 'epoch': 1} {'type': 'loss', 'content': 0.03632773086428642, 'timestamp': '2025-09-10 02:27:57.531973', 'step': 1894, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:57.559938', 'step': 1894, 'epoch': 1} {'type': 'loss', 'content': 0.013442902825772762, 'timestamp': '2025-09-10 02:27:57.561377', 'step': 1895, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:57.589213', 'step': 1895, 'epoch': 1} {'type': 'loss', 'content': 0.0278884656727314, 'timestamp': '2025-09-10 02:27:57.612139', 'step': 1896, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:57.639805', 'step': 1896, 'epoch': 1} {'type': 'loss', 'content': 0.021757736802101135, 'timestamp': '2025-09-10 02:27:57.641239', 'step': 1897, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:57.669692', 'step': 1897, 'epoch': 1} {'type': 'loss', 'content': 0.015394957736134529, 'timestamp': '2025-09-10 02:27:57.671313', 'step': 1898, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:57.699685', 'step': 1898, 'epoch': 1} {'type': 'loss', 'content': 0.046225737780332565, 'timestamp': '2025-09-10 02:27:57.701354', 'step': 1899, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:57.730185', 'step': 1899, 'epoch': 1} {'type': 'loss', 'content': 0.026236888021230698, 'timestamp': '2025-09-10 02:27:57.753285', 'step': 1900, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:57.781986', 'step': 1900, 'epoch': 1} {'type': 'loss', 'content': 0.04182019457221031, 'timestamp': '2025-09-10 02:27:57.783790', 'step': 1901, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:57.811985', 'step': 1901, 'epoch': 1} {'type': 'loss', 'content': 0.04555434733629227, 'timestamp': '2025-09-10 02:27:57.813453', 'step': 1902, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:57.841323', 'step': 1902, 'epoch': 1} {'type': 'loss', 'content': 0.015747955068945885, 'timestamp': '2025-09-10 02:27:57.842793', 'step': 1903, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:27:57.870995', 'step': 1903, 'epoch': 1} {'type': 'loss', 'content': 0.01313038356602192, 'timestamp': '2025-09-10 02:27:57.893997', 'step': 1904, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:57.922386', 'step': 1904, 'epoch': 1} {'type': 'loss', 'content': 0.02328430488705635, 'timestamp': '2025-09-10 02:27:57.923895', 'step': 1905, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:57.951902', 'step': 1905, 'epoch': 1} {'type': 'loss', 'content': 0.0312882624566555, 'timestamp': '2025-09-10 02:27:57.956924', 'step': 1906, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:57.985419', 'step': 1906, 'epoch': 1} {'type': 'loss', 'content': 0.009373379871249199, 'timestamp': '2025-09-10 02:27:57.987010', 'step': 1907, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:58.015272', 'step': 1907, 'epoch': 1} {'type': 'loss', 'content': 0.037006132304668427, 'timestamp': '2025-09-10 02:27:58.039093', 'step': 1908, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:58.067188', 'step': 1908, 'epoch': 1} {'type': 'loss', 'content': 0.05433737486600876, 'timestamp': '2025-09-10 02:27:58.068758', 'step': 1909, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:58.096849', 'step': 1909, 'epoch': 1} {'type': 'loss', 'content': 0.023593587800860405, 'timestamp': '2025-09-10 02:27:58.098139', 'step': 1910, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:58.126323', 'step': 1910, 'epoch': 1} {'type': 'loss', 'content': 0.02516046352684498, 'timestamp': '2025-09-10 02:27:58.127701', 'step': 1911, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:58.156830', 'step': 1911, 'epoch': 1} {'type': 'loss', 'content': 0.017987674102187157, 'timestamp': '2025-09-10 02:27:58.179821', 'step': 1912, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:58.207989', 'step': 1912, 'epoch': 1} {'type': 'loss', 'content': 0.015304679982364178, 'timestamp': '2025-09-10 02:27:58.209568', 'step': 1913, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:58.237308', 'step': 1913, 'epoch': 1} {'type': 'loss', 'content': 0.007738722953945398, 'timestamp': '2025-09-10 02:27:58.238958', 'step': 1914, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:58.267360', 'step': 1914, 'epoch': 1} {'type': 'loss', 'content': 0.013014032505452633, 'timestamp': '2025-09-10 02:27:58.268758', 'step': 1915, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:58.296747', 'step': 1915, 'epoch': 1} {'type': 'loss', 'content': 0.04582571983337402, 'timestamp': '2025-09-10 02:27:58.322292', 'step': 1916, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:58.350059', 'step': 1916, 'epoch': 1} {'type': 'loss', 'content': 0.013117613270878792, 'timestamp': '2025-09-10 02:27:58.351489', 'step': 1917, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:58.381128', 'step': 1917, 'epoch': 1} {'type': 'loss', 'content': 0.004100994672626257, 'timestamp': '2025-09-10 02:27:58.382574', 'step': 1918, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:58.410602', 'step': 1918, 'epoch': 1} {'type': 'loss', 'content': 0.020396525040268898, 'timestamp': '2025-09-10 02:27:58.411818', 'step': 1919, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:58.439879', 'step': 1919, 'epoch': 1} {'type': 'loss', 'content': 0.011074303649365902, 'timestamp': '2025-09-10 02:27:58.462922', 'step': 1920, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:58.491566', 'step': 1920, 'epoch': 1} {'type': 'loss', 'content': 0.05970010533928871, 'timestamp': '2025-09-10 02:27:58.493194', 'step': 1921, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:58.521290', 'step': 1921, 'epoch': 1} {'type': 'loss', 'content': 0.032971110194921494, 'timestamp': '2025-09-10 02:27:58.522764', 'step': 1922, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:58.551138', 'step': 1922, 'epoch': 1} {'type': 'loss', 'content': 0.07221423834562302, 'timestamp': '2025-09-10 02:27:58.552607', 'step': 1923, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:58.580543', 'step': 1923, 'epoch': 1} {'type': 'loss', 'content': 0.09146525710821152, 'timestamp': '2025-09-10 02:27:58.603608', 'step': 1924, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:58.632006', 'step': 1924, 'epoch': 1} {'type': 'loss', 'content': 0.029159855097532272, 'timestamp': '2025-09-10 02:27:58.633460', 'step': 1925, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:58.661485', 'step': 1925, 'epoch': 1} {'type': 'loss', 'content': 0.013702225871384144, 'timestamp': '2025-09-10 02:27:58.666154', 'step': 1926, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:58.702390', 'step': 1926, 'epoch': 1} {'type': 'loss', 'content': 0.0874093621969223, 'timestamp': '2025-09-10 02:27:58.703827', 'step': 1927, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:58.738075', 'step': 1927, 'epoch': 1} {'type': 'loss', 'content': 0.03918251767754555, 'timestamp': '2025-09-10 02:27:58.761087', 'step': 1928, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:58.789361', 'step': 1928, 'epoch': 1} {'type': 'loss', 'content': 0.05587069317698479, 'timestamp': '2025-09-10 02:27:58.790904', 'step': 1929, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:58.829615', 'step': 1929, 'epoch': 1} {'type': 'loss', 'content': 0.0371037982404232, 'timestamp': '2025-09-10 02:27:58.833946', 'step': 1930, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:58.862607', 'step': 1930, 'epoch': 1} {'type': 'loss', 'content': 0.048507921397686005, 'timestamp': '2025-09-10 02:27:58.864200', 'step': 1931, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:58.892387', 'step': 1931, 'epoch': 1} {'type': 'loss', 'content': 0.024592606350779533, 'timestamp': '2025-09-10 02:27:58.915377', 'step': 1932, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:58.943879', 'step': 1932, 'epoch': 1} {'type': 'loss', 'content': 0.008353027515113354, 'timestamp': '2025-09-10 02:27:58.945380', 'step': 1933, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:58.974956', 'step': 1933, 'epoch': 1} {'type': 'loss', 'content': 0.03610652685165405, 'timestamp': '2025-09-10 02:27:58.976610', 'step': 1934, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:59.004927', 'step': 1934, 'epoch': 1} {'type': 'loss', 'content': 0.04270109534263611, 'timestamp': '2025-09-10 02:27:59.006667', 'step': 1935, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:59.034713', 'step': 1935, 'epoch': 1} {'type': 'loss', 'content': 0.09751912206411362, 'timestamp': '2025-09-10 02:27:59.057936', 'step': 1936, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:59.086197', 'step': 1936, 'epoch': 1} {'type': 'loss', 'content': 0.025234397500753403, 'timestamp': '2025-09-10 02:27:59.088059', 'step': 1937, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:59.115999', 'step': 1937, 'epoch': 1} {'type': 'loss', 'content': 0.018462875857949257, 'timestamp': '2025-09-10 02:27:59.117593', 'step': 1938, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:59.145324', 'step': 1938, 'epoch': 1} {'type': 'loss', 'content': 0.056947946548461914, 'timestamp': '2025-09-10 02:27:59.146835', 'step': 1939, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:59.174887', 'step': 1939, 'epoch': 1} {'type': 'loss', 'content': 0.04623355343937874, 'timestamp': '2025-09-10 02:27:59.197854', 'step': 1940, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:59.226184', 'step': 1940, 'epoch': 1} {'type': 'loss', 'content': 0.01078362949192524, 'timestamp': '2025-09-10 02:27:59.227447', 'step': 1941, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:59.255446', 'step': 1941, 'epoch': 1} {'type': 'loss', 'content': 0.059156108647584915, 'timestamp': '2025-09-10 02:27:59.256934', 'step': 1942, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:59.285262', 'step': 1942, 'epoch': 1} {'type': 'loss', 'content': 0.030599039047956467, 'timestamp': '2025-09-10 02:27:59.287771', 'step': 1943, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:59.320679', 'step': 1943, 'epoch': 1} {'type': 'loss', 'content': 0.017089087516069412, 'timestamp': '2025-09-10 02:27:59.343468', 'step': 1944, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:59.371557', 'step': 1944, 'epoch': 1} {'type': 'loss', 'content': 0.04193213954567909, 'timestamp': '2025-09-10 02:27:59.372992', 'step': 1945, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:59.400821', 'step': 1945, 'epoch': 1} {'type': 'loss', 'content': 0.028696786612272263, 'timestamp': '2025-09-10 02:27:59.402541', 'step': 1946, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:59.430975', 'step': 1946, 'epoch': 1} {'type': 'loss', 'content': 0.015457267872989178, 'timestamp': '2025-09-10 02:27:59.432572', 'step': 1947, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:59.460720', 'step': 1947, 'epoch': 1} {'type': 'loss', 'content': 0.05320663005113602, 'timestamp': '2025-09-10 02:27:59.483802', 'step': 1948, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:59.512074', 'step': 1948, 'epoch': 1} {'type': 'loss', 'content': 0.014334471896290779, 'timestamp': '2025-09-10 02:27:59.513497', 'step': 1949, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:59.541383', 'step': 1949, 'epoch': 1} {'type': 'loss', 'content': 0.021078793331980705, 'timestamp': '2025-09-10 02:27:59.542952', 'step': 1950, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:59.571261', 'step': 1950, 'epoch': 1} {'type': 'loss', 'content': 0.04999444633722305, 'timestamp': '2025-09-10 02:27:59.572992', 'step': 1951, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:59.601354', 'step': 1951, 'epoch': 1} {'type': 'loss', 'content': 0.02752426452934742, 'timestamp': '2025-09-10 02:27:59.624668', 'step': 1952, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:59.653011', 'step': 1952, 'epoch': 1} {'type': 'loss', 'content': 0.019825072959065437, 'timestamp': '2025-09-10 02:27:59.654564', 'step': 1953, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:59.682549', 'step': 1953, 'epoch': 1} {'type': 'loss', 'content': 0.02840333990752697, 'timestamp': '2025-09-10 02:27:59.684830', 'step': 1954, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:27:59.715603', 'step': 1954, 'epoch': 1} {'type': 'loss', 'content': 0.052610281854867935, 'timestamp': '2025-09-10 02:27:59.719938', 'step': 1955, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:59.748377', 'step': 1955, 'epoch': 1} {'type': 'loss', 'content': 0.03353895619511604, 'timestamp': '2025-09-10 02:27:59.771857', 'step': 1956, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:59.802452', 'step': 1956, 'epoch': 1} {'type': 'loss', 'content': 0.02799171581864357, 'timestamp': '2025-09-10 02:27:59.804953', 'step': 1957, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:59.834426', 'step': 1957, 'epoch': 1} {'type': 'loss', 'content': 0.06994835287332535, 'timestamp': '2025-09-10 02:27:59.835942', 'step': 1958, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:27:59.863918', 'step': 1958, 'epoch': 1} {'type': 'loss', 'content': 0.019334720447659492, 'timestamp': '2025-09-10 02:27:59.865655', 'step': 1959, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:59.893692', 'step': 1959, 'epoch': 1} {'type': 'loss', 'content': 0.04380827397108078, 'timestamp': '2025-09-10 02:27:59.916696', 'step': 1960, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:59.945266', 'step': 1960, 'epoch': 1} {'type': 'loss', 'content': 0.0397782102227211, 'timestamp': '2025-09-10 02:27:59.946693', 'step': 1961, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:27:59.974745', 'step': 1961, 'epoch': 1} {'type': 'loss', 'content': 0.016844620928168297, 'timestamp': '2025-09-10 02:27:59.976314', 'step': 1962, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:00.004609', 'step': 1962, 'epoch': 1} {'type': 'loss', 'content': 0.027142390608787537, 'timestamp': '2025-09-10 02:28:00.006237', 'step': 1963, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:00.035147', 'step': 1963, 'epoch': 1} {'type': 'loss', 'content': 0.030943524092435837, 'timestamp': '2025-09-10 02:28:00.058226', 'step': 1964, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:00.086477', 'step': 1964, 'epoch': 1} {'type': 'loss', 'content': 0.03292815759778023, 'timestamp': '2025-09-10 02:28:00.088051', 'step': 1965, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:00.115960', 'step': 1965, 'epoch': 1} {'type': 'loss', 'content': 0.050953131169080734, 'timestamp': '2025-09-10 02:28:00.117706', 'step': 1966, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:00.145912', 'step': 1966, 'epoch': 1} {'type': 'loss', 'content': 0.023091718554496765, 'timestamp': '2025-09-10 02:28:00.147427', 'step': 1967, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:00.175591', 'step': 1967, 'epoch': 1} {'type': 'loss', 'content': 0.021349238231778145, 'timestamp': '2025-09-10 02:28:00.198578', 'step': 1968, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:00.227110', 'step': 1968, 'epoch': 1} {'type': 'loss', 'content': 0.03239194676280022, 'timestamp': '2025-09-10 02:28:00.228645', 'step': 1969, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:00.256590', 'step': 1969, 'epoch': 1} {'type': 'loss', 'content': 0.029955726116895676, 'timestamp': '2025-09-10 02:28:00.258169', 'step': 1970, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:00.286403', 'step': 1970, 'epoch': 1} {'type': 'loss', 'content': 0.01877775602042675, 'timestamp': '2025-09-10 02:28:00.287901', 'step': 1971, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:00.316128', 'step': 1971, 'epoch': 1} {'type': 'loss', 'content': 0.025350505486130714, 'timestamp': '2025-09-10 02:28:00.339126', 'step': 1972, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:00.367175', 'step': 1972, 'epoch': 1} {'type': 'loss', 'content': 0.04922991245985031, 'timestamp': '2025-09-10 02:28:00.368590', 'step': 1973, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:00.396631', 'step': 1973, 'epoch': 1} {'type': 'loss', 'content': 0.02344900369644165, 'timestamp': '2025-09-10 02:28:00.398219', 'step': 1974, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:00.429741', 'step': 1974, 'epoch': 1} {'type': 'loss', 'content': 0.04036623612046242, 'timestamp': '2025-09-10 02:28:00.431013', 'step': 1975, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:00.458531', 'step': 1975, 'epoch': 1} {'type': 'loss', 'content': 0.028169851750135422, 'timestamp': '2025-09-10 02:28:00.481467', 'step': 1976, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1582003754624}], 'timestamp': '2025-09-10 02:28:02.327132', 'step': 1976, 'epoch': 1} {'type': 'pplx', 'content': 2335998.333224945, 'timestamp': '2025-09-10 02:28:02.328617', 'step': 1976, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:02.355782', 'step': 1976, 'epoch': 1} {'type': 'loss', 'content': 0.04719608649611473, 'timestamp': '2025-09-10 02:28:02.357301', 'step': 1977, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:02.386114', 'step': 1977, 'epoch': 1} {'type': 'loss', 'content': 0.019742654636502266, 'timestamp': '2025-09-10 02:28:02.387564', 'step': 1978, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:02.416182', 'step': 1978, 'epoch': 1} {'type': 'loss', 'content': 0.07820205390453339, 'timestamp': '2025-09-10 02:28:02.417893', 'step': 1979, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:02.445727', 'step': 1979, 'epoch': 1} {'type': 'loss', 'content': 0.005003311205655336, 'timestamp': '2025-09-10 02:28:02.468829', 'step': 1980, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:02.497727', 'step': 1980, 'epoch': 1} {'type': 'loss', 'content': 0.07674206793308258, 'timestamp': '2025-09-10 02:28:02.499184', 'step': 1981, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:02.527551', 'step': 1981, 'epoch': 1} {'type': 'loss', 'content': 0.051874928176403046, 'timestamp': '2025-09-10 02:28:02.528771', 'step': 1982, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:02.556847', 'step': 1982, 'epoch': 1} {'type': 'loss', 'content': 0.023504799231886864, 'timestamp': '2025-09-10 02:28:02.558449', 'step': 1983, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:02.586564', 'step': 1983, 'epoch': 1} {'type': 'loss', 'content': 0.02263377234339714, 'timestamp': '2025-09-10 02:28:02.609618', 'step': 1984, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:02.637946', 'step': 1984, 'epoch': 1} {'type': 'loss', 'content': 0.04072608798742294, 'timestamp': '2025-09-10 02:28:02.639382', 'step': 1985, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:02.667982', 'step': 1985, 'epoch': 1} {'type': 'loss', 'content': 0.04306507483124733, 'timestamp': '2025-09-10 02:28:02.669472', 'step': 1986, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:02.697784', 'step': 1986, 'epoch': 1} {'type': 'loss', 'content': 0.030287135392427444, 'timestamp': '2025-09-10 02:28:02.699318', 'step': 1987, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:02.727646', 'step': 1987, 'epoch': 1} {'type': 'loss', 'content': 0.02686193212866783, 'timestamp': '2025-09-10 02:28:02.751879', 'step': 1988, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:02.780490', 'step': 1988, 'epoch': 1} {'type': 'loss', 'content': 0.03202953562140465, 'timestamp': '2025-09-10 02:28:02.782452', 'step': 1989, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:02.810908', 'step': 1989, 'epoch': 1} {'type': 'loss', 'content': 0.03473440185189247, 'timestamp': '2025-09-10 02:28:02.812657', 'step': 1990, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:02.841133', 'step': 1990, 'epoch': 1} {'type': 'loss', 'content': 0.02686433121562004, 'timestamp': '2025-09-10 02:28:02.842623', 'step': 1991, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:02.870794', 'step': 1991, 'epoch': 1} {'type': 'loss', 'content': 0.01151194330304861, 'timestamp': '2025-09-10 02:28:02.893884', 'step': 1992, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:02.922163', 'step': 1992, 'epoch': 1} {'type': 'loss', 'content': 0.019326459616422653, 'timestamp': '2025-09-10 02:28:02.923684', 'step': 1993, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:02.951826', 'step': 1993, 'epoch': 1} {'type': 'loss', 'content': 0.027592310681939125, 'timestamp': '2025-09-10 02:28:02.953218', 'step': 1994, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:02.981941', 'step': 1994, 'epoch': 1} {'type': 'loss', 'content': 0.010238692164421082, 'timestamp': '2025-09-10 02:28:02.983361', 'step': 1995, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:03.011858', 'step': 1995, 'epoch': 1} {'type': 'loss', 'content': 0.018648786470294, 'timestamp': '2025-09-10 02:28:03.034877', 'step': 1996, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:03.063371', 'step': 1996, 'epoch': 1} {'type': 'loss', 'content': 0.03684840351343155, 'timestamp': '2025-09-10 02:28:03.065004', 'step': 1997, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:03.093270', 'step': 1997, 'epoch': 1} {'type': 'loss', 'content': 0.014682809822261333, 'timestamp': '2025-09-10 02:28:03.094888', 'step': 1998, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:03.123166', 'step': 1998, 'epoch': 1} {'type': 'loss', 'content': 0.04274337366223335, 'timestamp': '2025-09-10 02:28:03.124590', 'step': 1999, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:03.152658', 'step': 1999, 'epoch': 1} {'type': 'loss', 'content': 0.033814895898103714, 'timestamp': '2025-09-10 02:28:03.175685', 'step': 2000, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 2000', 'timestamp': '2025-09-10 02:28:07.631862', 'step': 2000, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:07.667761', 'step': 2000, 'epoch': 1} {'type': 'loss', 'content': 0.029487574473023415, 'timestamp': '2025-09-10 02:28:07.669629', 'step': 2001, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:07.699836', 'step': 2001, 'epoch': 1} {'type': 'loss', 'content': 0.01706775650382042, 'timestamp': '2025-09-10 02:28:07.702279', 'step': 2002, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:07.731638', 'step': 2002, 'epoch': 1} {'type': 'loss', 'content': 0.03300907462835312, 'timestamp': '2025-09-10 02:28:07.733524', 'step': 2003, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:07.762523', 'step': 2003, 'epoch': 1} {'type': 'loss', 'content': 0.06733154505491257, 'timestamp': '2025-09-10 02:28:07.785798', 'step': 2004, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:07.814688', 'step': 2004, 'epoch': 1} {'type': 'loss', 'content': 0.04184475913643837, 'timestamp': '2025-09-10 02:28:07.816496', 'step': 2005, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:07.845566', 'step': 2005, 'epoch': 1} {'type': 'loss', 'content': 0.04564382880926132, 'timestamp': '2025-09-10 02:28:07.847163', 'step': 2006, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:07.875977', 'step': 2006, 'epoch': 1} {'type': 'loss', 'content': 0.03412893787026405, 'timestamp': '2025-09-10 02:28:07.877695', 'step': 2007, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:07.906286', 'step': 2007, 'epoch': 1} {'type': 'loss', 'content': 0.022876108065247536, 'timestamp': '2025-09-10 02:28:07.929760', 'step': 2008, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:07.958196', 'step': 2008, 'epoch': 1} {'type': 'loss', 'content': 0.018554767593741417, 'timestamp': '2025-09-10 02:28:07.959958', 'step': 2009, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:07.988679', 'step': 2009, 'epoch': 1} {'type': 'loss', 'content': 0.023886600509285927, 'timestamp': '2025-09-10 02:28:07.990056', 'step': 2010, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:08.018577', 'step': 2010, 'epoch': 1} {'type': 'loss', 'content': 0.04853728041052818, 'timestamp': '2025-09-10 02:28:08.020218', 'step': 2011, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:08.048990', 'step': 2011, 'epoch': 1} {'type': 'loss', 'content': 0.019898338243365288, 'timestamp': '2025-09-10 02:28:08.072154', 'step': 2012, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:08.100911', 'step': 2012, 'epoch': 1} {'type': 'loss', 'content': 0.019382771104574203, 'timestamp': '2025-09-10 02:28:08.102383', 'step': 2013, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:08.130951', 'step': 2013, 'epoch': 1} {'type': 'loss', 'content': 0.06830476224422455, 'timestamp': '2025-09-10 02:28:08.132408', 'step': 2014, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:08.161088', 'step': 2014, 'epoch': 1} {'type': 'loss', 'content': 0.005032069515436888, 'timestamp': '2025-09-10 02:28:08.162516', 'step': 2015, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:08.190615', 'step': 2015, 'epoch': 1} {'type': 'loss', 'content': 0.012147782370448112, 'timestamp': '2025-09-10 02:28:08.213740', 'step': 2016, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:08.242440', 'step': 2016, 'epoch': 1} {'type': 'loss', 'content': 0.04396127536892891, 'timestamp': '2025-09-10 02:28:08.244171', 'step': 2017, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:08.272480', 'step': 2017, 'epoch': 1} {'type': 'loss', 'content': 0.020190289244055748, 'timestamp': '2025-09-10 02:28:08.274367', 'step': 2018, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:08.303270', 'step': 2018, 'epoch': 1} {'type': 'loss', 'content': 0.02030044235289097, 'timestamp': '2025-09-10 02:28:08.304980', 'step': 2019, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:08.333182', 'step': 2019, 'epoch': 1} {'type': 'loss', 'content': 0.05512828379869461, 'timestamp': '2025-09-10 02:28:08.356347', 'step': 2020, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:08.385146', 'step': 2020, 'epoch': 1} {'type': 'loss', 'content': 0.02449539303779602, 'timestamp': '2025-09-10 02:28:08.386855', 'step': 2021, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:08.415908', 'step': 2021, 'epoch': 1} {'type': 'loss', 'content': 0.035348691046237946, 'timestamp': '2025-09-10 02:28:08.417545', 'step': 2022, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:08.445813', 'step': 2022, 'epoch': 1} {'type': 'loss', 'content': 0.03864375129342079, 'timestamp': '2025-09-10 02:28:08.447387', 'step': 2023, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:08.475909', 'step': 2023, 'epoch': 1} {'type': 'loss', 'content': 0.09501069784164429, 'timestamp': '2025-09-10 02:28:08.499069', 'step': 2024, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:08.528299', 'step': 2024, 'epoch': 1} {'type': 'loss', 'content': 0.008634706027805805, 'timestamp': '2025-09-10 02:28:08.530118', 'step': 2025, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:08.558863', 'step': 2025, 'epoch': 1} {'type': 'loss', 'content': 0.004540830384939909, 'timestamp': '2025-09-10 02:28:08.560675', 'step': 2026, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:08.589257', 'step': 2026, 'epoch': 1} {'type': 'loss', 'content': 0.018655642867088318, 'timestamp': '2025-09-10 02:28:08.591198', 'step': 2027, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:08.620045', 'step': 2027, 'epoch': 1} {'type': 'loss', 'content': 0.04642891511321068, 'timestamp': '2025-09-10 02:28:08.643270', 'step': 2028, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:08.672064', 'step': 2028, 'epoch': 1} {'type': 'loss', 'content': 0.034627027809619904, 'timestamp': '2025-09-10 02:28:08.673847', 'step': 2029, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:08.702917', 'step': 2029, 'epoch': 1} {'type': 'loss', 'content': 0.016922222450375557, 'timestamp': '2025-09-10 02:28:08.704895', 'step': 2030, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:08.733540', 'step': 2030, 'epoch': 1} {'type': 'loss', 'content': 0.07418196648359299, 'timestamp': '2025-09-10 02:28:08.735381', 'step': 2031, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:08.764541', 'step': 2031, 'epoch': 1} {'type': 'loss', 'content': 0.01996367797255516, 'timestamp': '2025-09-10 02:28:08.787835', 'step': 2032, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:08.816941', 'step': 2032, 'epoch': 1} {'type': 'loss', 'content': 0.019962644204497337, 'timestamp': '2025-09-10 02:28:08.818809', 'step': 2033, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:08.847373', 'step': 2033, 'epoch': 1} {'type': 'loss', 'content': 0.03800464794039726, 'timestamp': '2025-09-10 02:28:08.849170', 'step': 2034, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:08.877637', 'step': 2034, 'epoch': 1} {'type': 'loss', 'content': 0.029811426997184753, 'timestamp': '2025-09-10 02:28:08.879234', 'step': 2035, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:08.907786', 'step': 2035, 'epoch': 1} {'type': 'loss', 'content': 0.026991935446858406, 'timestamp': '2025-09-10 02:28:08.930858', 'step': 2036, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:08.960109', 'step': 2036, 'epoch': 1} {'type': 'loss', 'content': 0.020711610093712807, 'timestamp': '2025-09-10 02:28:08.961683', 'step': 2037, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:08.990633', 'step': 2037, 'epoch': 1} {'type': 'loss', 'content': 0.015253338031470776, 'timestamp': '2025-09-10 02:28:08.992313', 'step': 2038, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:09.020723', 'step': 2038, 'epoch': 1} {'type': 'loss', 'content': 0.02093043550848961, 'timestamp': '2025-09-10 02:28:09.022331', 'step': 2039, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:09.051248', 'step': 2039, 'epoch': 1} {'type': 'loss', 'content': 0.009021738544106483, 'timestamp': '2025-09-10 02:28:09.074144', 'step': 2040, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:09.102971', 'step': 2040, 'epoch': 1} {'type': 'loss', 'content': 0.03510228544473648, 'timestamp': '2025-09-10 02:28:09.104680', 'step': 2041, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:09.133246', 'step': 2041, 'epoch': 1} {'type': 'loss', 'content': 0.03412038832902908, 'timestamp': '2025-09-10 02:28:09.134886', 'step': 2042, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:09.163480', 'step': 2042, 'epoch': 1} {'type': 'loss', 'content': 0.030756931751966476, 'timestamp': '2025-09-10 02:28:09.165215', 'step': 2043, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:09.194082', 'step': 2043, 'epoch': 1} {'type': 'loss', 'content': 0.00732546066865325, 'timestamp': '2025-09-10 02:28:09.217456', 'step': 2044, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:09.246439', 'step': 2044, 'epoch': 1} {'type': 'loss', 'content': 0.025469835847616196, 'timestamp': '2025-09-10 02:28:09.248093', 'step': 2045, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:09.276472', 'step': 2045, 'epoch': 1} {'type': 'loss', 'content': 0.03232096508145332, 'timestamp': '2025-09-10 02:28:09.278311', 'step': 2046, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:09.307133', 'step': 2046, 'epoch': 1} {'type': 'loss', 'content': 0.017231693491339684, 'timestamp': '2025-09-10 02:28:09.309033', 'step': 2047, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:09.337744', 'step': 2047, 'epoch': 1} {'type': 'loss', 'content': 0.020491067320108414, 'timestamp': '2025-09-10 02:28:09.360873', 'step': 2048, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:09.389569', 'step': 2048, 'epoch': 1} {'type': 'loss', 'content': 0.028713207691907883, 'timestamp': '2025-09-10 02:28:09.391469', 'step': 2049, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:09.420005', 'step': 2049, 'epoch': 1} {'type': 'loss', 'content': 0.050764452666044235, 'timestamp': '2025-09-10 02:28:09.421886', 'step': 2050, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:09.450643', 'step': 2050, 'epoch': 1} {'type': 'loss', 'content': 0.050802286714315414, 'timestamp': '2025-09-10 02:28:09.452409', 'step': 2051, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:09.481339', 'step': 2051, 'epoch': 1} {'type': 'loss', 'content': 0.010211057029664516, 'timestamp': '2025-09-10 02:28:09.504347', 'step': 2052, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:09.533527', 'step': 2052, 'epoch': 1} {'type': 'loss', 'content': 0.025202931836247444, 'timestamp': '2025-09-10 02:28:09.535162', 'step': 2053, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:09.564202', 'step': 2053, 'epoch': 1} {'type': 'loss', 'content': 0.043125562369823456, 'timestamp': '2025-09-10 02:28:09.565687', 'step': 2054, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:09.594363', 'step': 2054, 'epoch': 1} {'type': 'loss', 'content': 0.012664678506553173, 'timestamp': '2025-09-10 02:28:09.596134', 'step': 2055, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:09.625237', 'step': 2055, 'epoch': 1} {'type': 'loss', 'content': 0.033021651208400726, 'timestamp': '2025-09-10 02:28:09.648294', 'step': 2056, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:09.676801', 'step': 2056, 'epoch': 1} {'type': 'loss', 'content': 0.027453215792775154, 'timestamp': '2025-09-10 02:28:09.678539', 'step': 2057, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:09.707599', 'step': 2057, 'epoch': 1} {'type': 'loss', 'content': 0.025804953649640083, 'timestamp': '2025-09-10 02:28:09.709304', 'step': 2058, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:09.738240', 'step': 2058, 'epoch': 1} {'type': 'loss', 'content': 0.0035124425776302814, 'timestamp': '2025-09-10 02:28:09.740208', 'step': 2059, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:09.768655', 'step': 2059, 'epoch': 1} {'type': 'loss', 'content': 0.03783496841788292, 'timestamp': '2025-09-10 02:28:09.792052', 'step': 2060, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:09.821319', 'step': 2060, 'epoch': 1} {'type': 'loss', 'content': 0.008713445626199245, 'timestamp': '2025-09-10 02:28:09.823300', 'step': 2061, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:09.852668', 'step': 2061, 'epoch': 1} {'type': 'loss', 'content': 0.026288988068699837, 'timestamp': '2025-09-10 02:28:09.854425', 'step': 2062, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:09.883916', 'step': 2062, 'epoch': 1} {'type': 'loss', 'content': 0.022028058767318726, 'timestamp': '2025-09-10 02:28:09.885695', 'step': 2063, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:09.914300', 'step': 2063, 'epoch': 1} {'type': 'loss', 'content': 0.0448044128715992, 'timestamp': '2025-09-10 02:28:09.937686', 'step': 2064, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:09.966331', 'step': 2064, 'epoch': 1} {'type': 'loss', 'content': 0.012004708871245384, 'timestamp': '2025-09-10 02:28:09.968078', 'step': 2065, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:09.996951', 'step': 2065, 'epoch': 1} {'type': 'loss', 'content': 0.023711347952485085, 'timestamp': '2025-09-10 02:28:09.998707', 'step': 2066, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:10.027736', 'step': 2066, 'epoch': 1} {'type': 'loss', 'content': 0.025672154501080513, 'timestamp': '2025-09-10 02:28:10.029472', 'step': 2067, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:10.058478', 'step': 2067, 'epoch': 1} {'type': 'loss', 'content': 0.02726186253130436, 'timestamp': '2025-09-10 02:28:10.081733', 'step': 2068, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:10.110792', 'step': 2068, 'epoch': 1} {'type': 'loss', 'content': 0.031530968844890594, 'timestamp': '2025-09-10 02:28:10.112476', 'step': 2069, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:10.141926', 'step': 2069, 'epoch': 1} {'type': 'loss', 'content': 0.019077647477388382, 'timestamp': '2025-09-10 02:28:10.143836', 'step': 2070, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:10.172633', 'step': 2070, 'epoch': 1} {'type': 'loss', 'content': 0.019523994997143745, 'timestamp': '2025-09-10 02:28:10.174591', 'step': 2071, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:10.203537', 'step': 2071, 'epoch': 1} {'type': 'loss', 'content': 0.03711892291903496, 'timestamp': '2025-09-10 02:28:10.226799', 'step': 2072, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:10.255143', 'step': 2072, 'epoch': 1} {'type': 'loss', 'content': 0.0593751035630703, 'timestamp': '2025-09-10 02:28:10.256515', 'step': 2073, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:10.284797', 'step': 2073, 'epoch': 1} {'type': 'loss', 'content': 0.023463675752282143, 'timestamp': '2025-09-10 02:28:10.286046', 'step': 2074, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:10.314292', 'step': 2074, 'epoch': 1} {'type': 'loss', 'content': 0.01058922614902258, 'timestamp': '2025-09-10 02:28:10.316086', 'step': 2075, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:10.344791', 'step': 2075, 'epoch': 1} {'type': 'loss', 'content': 0.0200516264885664, 'timestamp': '2025-09-10 02:28:10.367937', 'step': 2076, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:10.396622', 'step': 2076, 'epoch': 1} {'type': 'loss', 'content': 0.01980191469192505, 'timestamp': '2025-09-10 02:28:10.398247', 'step': 2077, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:10.426733', 'step': 2077, 'epoch': 1} {'type': 'loss', 'content': 0.03624066710472107, 'timestamp': '2025-09-10 02:28:10.428125', 'step': 2078, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:10.456192', 'step': 2078, 'epoch': 1} {'type': 'loss', 'content': 0.0021096656564623117, 'timestamp': '2025-09-10 02:28:10.457380', 'step': 2079, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:10.486277', 'step': 2079, 'epoch': 1} {'type': 'loss', 'content': 0.06231212615966797, 'timestamp': '2025-09-10 02:28:10.509552', 'step': 2080, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:10.538523', 'step': 2080, 'epoch': 1} {'type': 'loss', 'content': 0.06326466053724289, 'timestamp': '2025-09-10 02:28:10.539980', 'step': 2081, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:10.568364', 'step': 2081, 'epoch': 1} {'type': 'loss', 'content': 0.02771034464240074, 'timestamp': '2025-09-10 02:28:10.569889', 'step': 2082, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:10.598182', 'step': 2082, 'epoch': 1} {'type': 'loss', 'content': 0.030195659026503563, 'timestamp': '2025-09-10 02:28:10.599804', 'step': 2083, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:10.628422', 'step': 2083, 'epoch': 1} {'type': 'loss', 'content': 0.013478175736963749, 'timestamp': '2025-09-10 02:28:10.651349', 'step': 2084, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:10.680300', 'step': 2084, 'epoch': 1} {'type': 'loss', 'content': 0.014244613237679005, 'timestamp': '2025-09-10 02:28:10.681839', 'step': 2085, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:10.710502', 'step': 2085, 'epoch': 1} {'type': 'loss', 'content': 0.007503572851419449, 'timestamp': '2025-09-10 02:28:10.712098', 'step': 2086, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:10.740194', 'step': 2086, 'epoch': 1} {'type': 'loss', 'content': 0.04743637517094612, 'timestamp': '2025-09-10 02:28:10.741566', 'step': 2087, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:10.769657', 'step': 2087, 'epoch': 1} {'type': 'loss', 'content': 0.017474576830863953, 'timestamp': '2025-09-10 02:28:10.792657', 'step': 2088, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:10.821101', 'step': 2088, 'epoch': 1} {'type': 'loss', 'content': 0.029541777446866035, 'timestamp': '2025-09-10 02:28:10.822849', 'step': 2089, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:10.851456', 'step': 2089, 'epoch': 1} {'type': 'loss', 'content': 0.02945699170231819, 'timestamp': '2025-09-10 02:28:10.853323', 'step': 2090, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:10.882235', 'step': 2090, 'epoch': 1} {'type': 'loss', 'content': 0.027768908068537712, 'timestamp': '2025-09-10 02:28:10.883636', 'step': 2091, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:10.911822', 'step': 2091, 'epoch': 1} {'type': 'loss', 'content': 0.016978060826659203, 'timestamp': '2025-09-10 02:28:10.934886', 'step': 2092, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:10.963708', 'step': 2092, 'epoch': 1} {'type': 'loss', 'content': 0.027785973623394966, 'timestamp': '2025-09-10 02:28:10.965267', 'step': 2093, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:10.993810', 'step': 2093, 'epoch': 1} {'type': 'loss', 'content': 0.03528384491801262, 'timestamp': '2025-09-10 02:28:10.995299', 'step': 2094, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:11.023795', 'step': 2094, 'epoch': 1} {'type': 'loss', 'content': 0.030607204884290695, 'timestamp': '2025-09-10 02:28:11.025203', 'step': 2095, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:11.053355', 'step': 2095, 'epoch': 1} {'type': 'loss', 'content': 0.04359974339604378, 'timestamp': '2025-09-10 02:28:11.076310', 'step': 2096, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:11.104890', 'step': 2096, 'epoch': 1} {'type': 'loss', 'content': 0.010347840376198292, 'timestamp': '2025-09-10 02:28:11.106445', 'step': 2097, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:11.135252', 'step': 2097, 'epoch': 1} {'type': 'loss', 'content': 0.048572640866041183, 'timestamp': '2025-09-10 02:28:11.136631', 'step': 2098, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:11.164797', 'step': 2098, 'epoch': 1} {'type': 'loss', 'content': 0.02608483098447323, 'timestamp': '2025-09-10 02:28:11.166461', 'step': 2099, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:11.195062', 'step': 2099, 'epoch': 1} {'type': 'loss', 'content': 0.006108488887548447, 'timestamp': '2025-09-10 02:28:11.217957', 'step': 2100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:11.246612', 'step': 2100, 'epoch': 1} {'type': 'loss', 'content': 0.012660800479352474, 'timestamp': '2025-09-10 02:28:11.248218', 'step': 2101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:11.276390', 'step': 2101, 'epoch': 1} {'type': 'loss', 'content': 0.012901701964437962, 'timestamp': '2025-09-10 02:28:11.277977', 'step': 2102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:11.306548', 'step': 2102, 'epoch': 1} {'type': 'loss', 'content': 0.05675072222948074, 'timestamp': '2025-09-10 02:28:11.308198', 'step': 2103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:11.336723', 'step': 2103, 'epoch': 1} {'type': 'loss', 'content': 0.01660478673875332, 'timestamp': '2025-09-10 02:28:11.359924', 'step': 2104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:11.388784', 'step': 2104, 'epoch': 1} {'type': 'loss', 'content': 0.025345126166939735, 'timestamp': '2025-09-10 02:28:11.390690', 'step': 2105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:11.419266', 'step': 2105, 'epoch': 1} {'type': 'loss', 'content': 0.019086772575974464, 'timestamp': '2025-09-10 02:28:11.420836', 'step': 2106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:11.449105', 'step': 2106, 'epoch': 1} {'type': 'loss', 'content': 0.059554796665906906, 'timestamp': '2025-09-10 02:28:11.450308', 'step': 2107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:11.478819', 'step': 2107, 'epoch': 1} {'type': 'loss', 'content': 0.009006127715110779, 'timestamp': '2025-09-10 02:28:11.502164', 'step': 2108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:11.530784', 'step': 2108, 'epoch': 1} {'type': 'loss', 'content': 0.04445163160562515, 'timestamp': '2025-09-10 02:28:11.532035', 'step': 2109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:11.560194', 'step': 2109, 'epoch': 1} {'type': 'loss', 'content': 0.03971460089087486, 'timestamp': '2025-09-10 02:28:11.561679', 'step': 2110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:11.590148', 'step': 2110, 'epoch': 1} {'type': 'loss', 'content': 0.0253768227994442, 'timestamp': '2025-09-10 02:28:11.591758', 'step': 2111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:11.620088', 'step': 2111, 'epoch': 1} {'type': 'loss', 'content': 0.040283214300870895, 'timestamp': '2025-09-10 02:28:11.643139', 'step': 2112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:11.671564', 'step': 2112, 'epoch': 1} {'type': 'loss', 'content': 0.010990321636199951, 'timestamp': '2025-09-10 02:28:11.673005', 'step': 2113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:11.701603', 'step': 2113, 'epoch': 1} {'type': 'loss', 'content': 0.013297955505549908, 'timestamp': '2025-09-10 02:28:11.703200', 'step': 2114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:11.731988', 'step': 2114, 'epoch': 1} {'type': 'loss', 'content': 0.023308461531996727, 'timestamp': '2025-09-10 02:28:11.733659', 'step': 2115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:11.762253', 'step': 2115, 'epoch': 1} {'type': 'loss', 'content': 0.06439714133739471, 'timestamp': '2025-09-10 02:28:11.785204', 'step': 2116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:11.813957', 'step': 2116, 'epoch': 1} {'type': 'loss', 'content': 0.010951301082968712, 'timestamp': '2025-09-10 02:28:11.815631', 'step': 2117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:11.844010', 'step': 2117, 'epoch': 1} {'type': 'loss', 'content': 0.008319989778101444, 'timestamp': '2025-09-10 02:28:11.845547', 'step': 2118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:11.873815', 'step': 2118, 'epoch': 1} {'type': 'loss', 'content': 0.03934324532747269, 'timestamp': '2025-09-10 02:28:11.875466', 'step': 2119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:11.903971', 'step': 2119, 'epoch': 1} {'type': 'loss', 'content': 0.04784102365374565, 'timestamp': '2025-09-10 02:28:11.927279', 'step': 2120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:11.955591', 'step': 2120, 'epoch': 1} {'type': 'loss', 'content': 0.040347445756196976, 'timestamp': '2025-09-10 02:28:11.957131', 'step': 2121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:11.985694', 'step': 2121, 'epoch': 1} {'type': 'loss', 'content': 0.008014153689146042, 'timestamp': '2025-09-10 02:28:11.987072', 'step': 2122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:12.015545', 'step': 2122, 'epoch': 1} {'type': 'loss', 'content': 0.052506666630506516, 'timestamp': '2025-09-10 02:28:12.016943', 'step': 2123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:12.044970', 'step': 2123, 'epoch': 1} {'type': 'loss', 'content': 0.011240340769290924, 'timestamp': '2025-09-10 02:28:12.068151', 'step': 2124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:12.096596', 'step': 2124, 'epoch': 1} {'type': 'loss', 'content': 0.010675467550754547, 'timestamp': '2025-09-10 02:28:12.098235', 'step': 2125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:12.126842', 'step': 2125, 'epoch': 1} {'type': 'loss', 'content': 0.006910696625709534, 'timestamp': '2025-09-10 02:28:12.128405', 'step': 2126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:12.157267', 'step': 2126, 'epoch': 1} {'type': 'loss', 'content': 0.00909285806119442, 'timestamp': '2025-09-10 02:28:12.158680', 'step': 2127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:12.187211', 'step': 2127, 'epoch': 1} {'type': 'loss', 'content': 0.01328167226165533, 'timestamp': '2025-09-10 02:28:12.210113', 'step': 2128, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1582003754624}], 'timestamp': '2025-09-10 02:28:14.066829', 'step': 2128, 'epoch': 1} {'type': 'pplx', 'content': 2282446.507026665, 'timestamp': '2025-09-10 02:28:14.068360', 'step': 2128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:14.095523', 'step': 2128, 'epoch': 1} {'type': 'loss', 'content': 0.06975377351045609, 'timestamp': '2025-09-10 02:28:14.097186', 'step': 2129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:14.125963', 'step': 2129, 'epoch': 1} {'type': 'loss', 'content': 0.010947045870125294, 'timestamp': '2025-09-10 02:28:14.127381', 'step': 2130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:14.156648', 'step': 2130, 'epoch': 1} {'type': 'loss', 'content': 0.006454108748584986, 'timestamp': '2025-09-10 02:28:14.158172', 'step': 2131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:14.186675', 'step': 2131, 'epoch': 1} {'type': 'loss', 'content': 0.03406180068850517, 'timestamp': '2025-09-10 02:28:14.209765', 'step': 2132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:14.238570', 'step': 2132, 'epoch': 1} {'type': 'loss', 'content': 0.03409416601061821, 'timestamp': '2025-09-10 02:28:14.239945', 'step': 2133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:14.268024', 'step': 2133, 'epoch': 1} {'type': 'loss', 'content': 0.014770114794373512, 'timestamp': '2025-09-10 02:28:14.269418', 'step': 2134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:14.298132', 'step': 2134, 'epoch': 1} {'type': 'loss', 'content': 0.047664035111665726, 'timestamp': '2025-09-10 02:28:14.299551', 'step': 2135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:14.327775', 'step': 2135, 'epoch': 1} {'type': 'loss', 'content': 0.015450273640453815, 'timestamp': '2025-09-10 02:28:14.350895', 'step': 2136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:14.380306', 'step': 2136, 'epoch': 1} {'type': 'loss', 'content': 0.00842917338013649, 'timestamp': '2025-09-10 02:28:14.382027', 'step': 2137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:14.410073', 'step': 2137, 'epoch': 1} {'type': 'loss', 'content': 0.023178046569228172, 'timestamp': '2025-09-10 02:28:14.412117', 'step': 2138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:14.440609', 'step': 2138, 'epoch': 1} {'type': 'loss', 'content': 0.025276973843574524, 'timestamp': '2025-09-10 02:28:14.442238', 'step': 2139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:14.470989', 'step': 2139, 'epoch': 1} {'type': 'loss', 'content': 0.0542147234082222, 'timestamp': '2025-09-10 02:28:14.494224', 'step': 2140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:14.522791', 'step': 2140, 'epoch': 1} {'type': 'loss', 'content': 0.04216999188065529, 'timestamp': '2025-09-10 02:28:14.524452', 'step': 2141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:14.552879', 'step': 2141, 'epoch': 1} {'type': 'loss', 'content': 0.01685541495680809, 'timestamp': '2025-09-10 02:28:14.554592', 'step': 2142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:14.583315', 'step': 2142, 'epoch': 1} {'type': 'loss', 'content': 0.011981974355876446, 'timestamp': '2025-09-10 02:28:14.585082', 'step': 2143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:14.613937', 'step': 2143, 'epoch': 1} {'type': 'loss', 'content': 0.012850151397287846, 'timestamp': '2025-09-10 02:28:14.636827', 'step': 2144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:14.665470', 'step': 2144, 'epoch': 1} {'type': 'loss', 'content': 0.01151515543460846, 'timestamp': '2025-09-10 02:28:14.666970', 'step': 2145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:14.695149', 'step': 2145, 'epoch': 1} {'type': 'loss', 'content': 0.022794952616095543, 'timestamp': '2025-09-10 02:28:14.696729', 'step': 2146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:14.724795', 'step': 2146, 'epoch': 1} {'type': 'loss', 'content': 0.03026963397860527, 'timestamp': '2025-09-10 02:28:14.726416', 'step': 2147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:14.754918', 'step': 2147, 'epoch': 1} {'type': 'loss', 'content': 0.02113904245197773, 'timestamp': '2025-09-10 02:28:14.777810', 'step': 2148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:14.806391', 'step': 2148, 'epoch': 1} {'type': 'loss', 'content': 0.03410894051194191, 'timestamp': '2025-09-10 02:28:14.808025', 'step': 2149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:14.836256', 'step': 2149, 'epoch': 1} {'type': 'loss', 'content': 0.027280699461698532, 'timestamp': '2025-09-10 02:28:14.837626', 'step': 2150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:14.865692', 'step': 2150, 'epoch': 1} {'type': 'loss', 'content': 0.038459666073322296, 'timestamp': '2025-09-10 02:28:14.867182', 'step': 2151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:14.895659', 'step': 2151, 'epoch': 1} {'type': 'loss', 'content': 0.02917073294520378, 'timestamp': '2025-09-10 02:28:14.918680', 'step': 2152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:14.947785', 'step': 2152, 'epoch': 1} {'type': 'loss', 'content': 0.011971733532845974, 'timestamp': '2025-09-10 02:28:14.949302', 'step': 2153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:14.977470', 'step': 2153, 'epoch': 1} {'type': 'loss', 'content': 0.04599002003669739, 'timestamp': '2025-09-10 02:28:14.979151', 'step': 2154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:15.007861', 'step': 2154, 'epoch': 1} {'type': 'loss', 'content': 0.07949955016374588, 'timestamp': '2025-09-10 02:28:15.009499', 'step': 2155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:15.037708', 'step': 2155, 'epoch': 1} {'type': 'loss', 'content': 0.052912481129169464, 'timestamp': '2025-09-10 02:28:15.060939', 'step': 2156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:15.089602', 'step': 2156, 'epoch': 1} {'type': 'loss', 'content': 0.03489892557263374, 'timestamp': '2025-09-10 02:28:15.091298', 'step': 2157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:15.120021', 'step': 2157, 'epoch': 1} {'type': 'loss', 'content': 0.02402343414723873, 'timestamp': '2025-09-10 02:28:15.121598', 'step': 2158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:15.149930', 'step': 2158, 'epoch': 1} {'type': 'loss', 'content': 0.038121890276670456, 'timestamp': '2025-09-10 02:28:15.151364', 'step': 2159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:15.180789', 'step': 2159, 'epoch': 1} {'type': 'loss', 'content': 0.007009260356426239, 'timestamp': '2025-09-10 02:28:15.203994', 'step': 2160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:15.232957', 'step': 2160, 'epoch': 1} {'type': 'loss', 'content': 0.023040596395730972, 'timestamp': '2025-09-10 02:28:15.234612', 'step': 2161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:15.262977', 'step': 2161, 'epoch': 1} {'type': 'loss', 'content': 0.025174299255013466, 'timestamp': '2025-09-10 02:28:15.264440', 'step': 2162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:15.293054', 'step': 2162, 'epoch': 1} {'type': 'loss', 'content': 0.01304810680449009, 'timestamp': '2025-09-10 02:28:15.294485', 'step': 2163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:15.322604', 'step': 2163, 'epoch': 1} {'type': 'loss', 'content': 0.002673085778951645, 'timestamp': '2025-09-10 02:28:15.345605', 'step': 2164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:15.374156', 'step': 2164, 'epoch': 1} {'type': 'loss', 'content': 0.026758363470435143, 'timestamp': '2025-09-10 02:28:15.375557', 'step': 2165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:15.403925', 'step': 2165, 'epoch': 1} {'type': 'loss', 'content': 0.04663803428411484, 'timestamp': '2025-09-10 02:28:15.405442', 'step': 2166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:15.433661', 'step': 2166, 'epoch': 1} {'type': 'loss', 'content': 0.033424012362957, 'timestamp': '2025-09-10 02:28:15.435333', 'step': 2167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:15.463712', 'step': 2167, 'epoch': 1} {'type': 'loss', 'content': 0.0035018210764974356, 'timestamp': '2025-09-10 02:28:15.486713', 'step': 2168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:15.515068', 'step': 2168, 'epoch': 1} {'type': 'loss', 'content': 0.0530601367354393, 'timestamp': '2025-09-10 02:28:15.516557', 'step': 2169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:15.544791', 'step': 2169, 'epoch': 1} {'type': 'loss', 'content': 0.01699506677687168, 'timestamp': '2025-09-10 02:28:15.546428', 'step': 2170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:15.574960', 'step': 2170, 'epoch': 1} {'type': 'loss', 'content': 0.009327538311481476, 'timestamp': '2025-09-10 02:28:15.576832', 'step': 2171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:15.605615', 'step': 2171, 'epoch': 1} {'type': 'loss', 'content': 0.029642315581440926, 'timestamp': '2025-09-10 02:28:15.628876', 'step': 2172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:15.658127', 'step': 2172, 'epoch': 1} {'type': 'loss', 'content': 0.012382798828184605, 'timestamp': '2025-09-10 02:28:15.659656', 'step': 2173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:15.687953', 'step': 2173, 'epoch': 1} {'type': 'loss', 'content': 0.01727687008678913, 'timestamp': '2025-09-10 02:28:15.689390', 'step': 2174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:15.718095', 'step': 2174, 'epoch': 1} {'type': 'loss', 'content': 0.010606663301587105, 'timestamp': '2025-09-10 02:28:15.719770', 'step': 2175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:15.747803', 'step': 2175, 'epoch': 1} {'type': 'loss', 'content': 0.023922478780150414, 'timestamp': '2025-09-10 02:28:15.770826', 'step': 2176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:15.800258', 'step': 2176, 'epoch': 1} {'type': 'loss', 'content': 0.024894535541534424, 'timestamp': '2025-09-10 02:28:15.801660', 'step': 2177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:15.830065', 'step': 2177, 'epoch': 1} {'type': 'loss', 'content': 0.014281758107244968, 'timestamp': '2025-09-10 02:28:15.831532', 'step': 2178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:15.860395', 'step': 2178, 'epoch': 1} {'type': 'loss', 'content': 0.04998927190899849, 'timestamp': '2025-09-10 02:28:15.861963', 'step': 2179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:15.890152', 'step': 2179, 'epoch': 1} {'type': 'loss', 'content': 0.008675232529640198, 'timestamp': '2025-09-10 02:28:15.913368', 'step': 2180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:15.941803', 'step': 2180, 'epoch': 1} {'type': 'loss', 'content': 0.0068986122496426105, 'timestamp': '2025-09-10 02:28:15.943339', 'step': 2181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:15.972064', 'step': 2181, 'epoch': 1} {'type': 'loss', 'content': 0.11237634718418121, 'timestamp': '2025-09-10 02:28:15.973595', 'step': 2182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:16.001845', 'step': 2182, 'epoch': 1} {'type': 'loss', 'content': 0.03960355743765831, 'timestamp': '2025-09-10 02:28:16.003188', 'step': 2183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:16.031808', 'step': 2183, 'epoch': 1} {'type': 'loss', 'content': 0.01569380983710289, 'timestamp': '2025-09-10 02:28:16.054885', 'step': 2184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:16.083415', 'step': 2184, 'epoch': 1} {'type': 'loss', 'content': 0.038847409188747406, 'timestamp': '2025-09-10 02:28:16.084946', 'step': 2185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:16.113582', 'step': 2185, 'epoch': 1} {'type': 'loss', 'content': 0.037907857447862625, 'timestamp': '2025-09-10 02:28:16.115104', 'step': 2186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:16.144296', 'step': 2186, 'epoch': 1} {'type': 'loss', 'content': 0.018548624590039253, 'timestamp': '2025-09-10 02:28:16.146005', 'step': 2187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:16.174502', 'step': 2187, 'epoch': 1} {'type': 'loss', 'content': 0.04279320314526558, 'timestamp': '2025-09-10 02:28:16.197629', 'step': 2188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:16.226160', 'step': 2188, 'epoch': 1} {'type': 'loss', 'content': 0.022673947736620903, 'timestamp': '2025-09-10 02:28:16.227867', 'step': 2189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:16.256298', 'step': 2189, 'epoch': 1} {'type': 'loss', 'content': 0.010001570917665958, 'timestamp': '2025-09-10 02:28:16.257674', 'step': 2190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:16.286825', 'step': 2190, 'epoch': 1} {'type': 'loss', 'content': 0.008761915378272533, 'timestamp': '2025-09-10 02:28:16.288342', 'step': 2191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:16.317155', 'step': 2191, 'epoch': 1} {'type': 'loss', 'content': 0.027428340166807175, 'timestamp': '2025-09-10 02:28:16.340253', 'step': 2192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:16.369200', 'step': 2192, 'epoch': 1} {'type': 'loss', 'content': 0.011549754999577999, 'timestamp': '2025-09-10 02:28:16.370661', 'step': 2193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:16.399156', 'step': 2193, 'epoch': 1} {'type': 'loss', 'content': 0.022620776668190956, 'timestamp': '2025-09-10 02:28:16.400643', 'step': 2194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:16.429394', 'step': 2194, 'epoch': 1} {'type': 'loss', 'content': 0.052805330604314804, 'timestamp': '2025-09-10 02:28:16.430799', 'step': 2195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:16.459301', 'step': 2195, 'epoch': 1} {'type': 'loss', 'content': 0.03661612421274185, 'timestamp': '2025-09-10 02:28:16.482315', 'step': 2196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:16.510440', 'step': 2196, 'epoch': 1} {'type': 'loss', 'content': 0.025274012237787247, 'timestamp': '2025-09-10 02:28:16.511841', 'step': 2197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:16.540215', 'step': 2197, 'epoch': 1} {'type': 'loss', 'content': 0.017450639978051186, 'timestamp': '2025-09-10 02:28:16.541604', 'step': 2198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:16.569799', 'step': 2198, 'epoch': 1} {'type': 'loss', 'content': 0.012563006021082401, 'timestamp': '2025-09-10 02:28:16.571289', 'step': 2199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:16.599678', 'step': 2199, 'epoch': 1} {'type': 'loss', 'content': 0.05560094490647316, 'timestamp': '2025-09-10 02:28:16.622968', 'step': 2200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:16.651961', 'step': 2200, 'epoch': 1} {'type': 'loss', 'content': 0.04848475009202957, 'timestamp': '2025-09-10 02:28:16.653602', 'step': 2201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:16.681800', 'step': 2201, 'epoch': 1} {'type': 'loss', 'content': 0.050533466041088104, 'timestamp': '2025-09-10 02:28:16.683409', 'step': 2202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:16.711967', 'step': 2202, 'epoch': 1} {'type': 'loss', 'content': 0.030187392607331276, 'timestamp': '2025-09-10 02:28:16.713405', 'step': 2203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:16.741920', 'step': 2203, 'epoch': 1} {'type': 'loss', 'content': 0.02421838603913784, 'timestamp': '2025-09-10 02:28:16.765120', 'step': 2204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:16.793666', 'step': 2204, 'epoch': 1} {'type': 'loss', 'content': 0.05683031678199768, 'timestamp': '2025-09-10 02:28:16.795290', 'step': 2205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:16.823954', 'step': 2205, 'epoch': 1} {'type': 'loss', 'content': 0.06156497076153755, 'timestamp': '2025-09-10 02:28:16.825409', 'step': 2206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:16.853914', 'step': 2206, 'epoch': 1} {'type': 'loss', 'content': 0.007874163798987865, 'timestamp': '2025-09-10 02:28:16.855426', 'step': 2207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:16.883797', 'step': 2207, 'epoch': 1} {'type': 'loss', 'content': 0.029922401532530785, 'timestamp': '2025-09-10 02:28:16.906844', 'step': 2208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:16.935189', 'step': 2208, 'epoch': 1} {'type': 'loss', 'content': 0.06222435459494591, 'timestamp': '2025-09-10 02:28:16.937827', 'step': 2209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:16.969080', 'step': 2209, 'epoch': 1} {'type': 'loss', 'content': 0.01762712560594082, 'timestamp': '2025-09-10 02:28:16.970465', 'step': 2210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:16.999106', 'step': 2210, 'epoch': 1} {'type': 'loss', 'content': 0.015461350791156292, 'timestamp': '2025-09-10 02:28:17.000537', 'step': 2211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:17.029163', 'step': 2211, 'epoch': 1} {'type': 'loss', 'content': 0.01912308856844902, 'timestamp': '2025-09-10 02:28:17.052150', 'step': 2212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:17.080227', 'step': 2212, 'epoch': 1} {'type': 'loss', 'content': 0.044046707451343536, 'timestamp': '2025-09-10 02:28:17.081967', 'step': 2213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:17.110555', 'step': 2213, 'epoch': 1} {'type': 'loss', 'content': 0.01707787811756134, 'timestamp': '2025-09-10 02:28:17.112246', 'step': 2214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:17.141271', 'step': 2214, 'epoch': 1} {'type': 'loss', 'content': 0.013046731241047382, 'timestamp': '2025-09-10 02:28:17.143008', 'step': 2215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:17.171636', 'step': 2215, 'epoch': 1} {'type': 'loss', 'content': 0.06424341350793839, 'timestamp': '2025-09-10 02:28:17.194825', 'step': 2216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:17.223699', 'step': 2216, 'epoch': 1} {'type': 'loss', 'content': 0.014600231312215328, 'timestamp': '2025-09-10 02:28:17.225474', 'step': 2217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:17.254045', 'step': 2217, 'epoch': 1} {'type': 'loss', 'content': 0.023384276777505875, 'timestamp': '2025-09-10 02:28:17.255831', 'step': 2218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:17.284159', 'step': 2218, 'epoch': 1} {'type': 'loss', 'content': 0.021726815029978752, 'timestamp': '2025-09-10 02:28:17.286007', 'step': 2219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:17.314656', 'step': 2219, 'epoch': 1} {'type': 'loss', 'content': 0.007144995033740997, 'timestamp': '2025-09-10 02:28:17.337890', 'step': 2220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:17.366917', 'step': 2220, 'epoch': 1} {'type': 'loss', 'content': 0.026717673987150192, 'timestamp': '2025-09-10 02:28:17.368819', 'step': 2221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:17.397796', 'step': 2221, 'epoch': 1} {'type': 'loss', 'content': 0.009127212688326836, 'timestamp': '2025-09-10 02:28:17.399869', 'step': 2222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:17.429062', 'step': 2222, 'epoch': 1} {'type': 'loss', 'content': 0.011890118941664696, 'timestamp': '2025-09-10 02:28:17.430938', 'step': 2223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:17.460264', 'step': 2223, 'epoch': 1} {'type': 'loss', 'content': 0.016838667914271355, 'timestamp': '2025-09-10 02:28:17.483544', 'step': 2224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:17.514288', 'step': 2224, 'epoch': 1} {'type': 'loss', 'content': 0.0234785545617342, 'timestamp': '2025-09-10 02:28:17.515975', 'step': 2225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:17.544427', 'step': 2225, 'epoch': 1} {'type': 'loss', 'content': 0.028258562088012695, 'timestamp': '2025-09-10 02:28:17.546008', 'step': 2226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:17.574318', 'step': 2226, 'epoch': 1} {'type': 'loss', 'content': 0.03367723897099495, 'timestamp': '2025-09-10 02:28:17.575865', 'step': 2227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:17.604297', 'step': 2227, 'epoch': 1} {'type': 'loss', 'content': 0.03372273966670036, 'timestamp': '2025-09-10 02:28:17.627345', 'step': 2228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:17.657042', 'step': 2228, 'epoch': 1} {'type': 'loss', 'content': 0.026078801602125168, 'timestamp': '2025-09-10 02:28:17.658893', 'step': 2229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:17.688098', 'step': 2229, 'epoch': 1} {'type': 'loss', 'content': 0.008887152187526226, 'timestamp': '2025-09-10 02:28:17.690114', 'step': 2230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:17.718801', 'step': 2230, 'epoch': 1} {'type': 'loss', 'content': 0.028703680261969566, 'timestamp': '2025-09-10 02:28:17.720487', 'step': 2231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:17.749353', 'step': 2231, 'epoch': 1} {'type': 'loss', 'content': 0.007377589587122202, 'timestamp': '2025-09-10 02:28:17.772773', 'step': 2232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:17.801632', 'step': 2232, 'epoch': 1} {'type': 'loss', 'content': 0.049235738813877106, 'timestamp': '2025-09-10 02:28:17.803287', 'step': 2233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:17.832031', 'step': 2233, 'epoch': 1} {'type': 'loss', 'content': 0.02330530807375908, 'timestamp': '2025-09-10 02:28:17.833874', 'step': 2234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:17.862187', 'step': 2234, 'epoch': 1} {'type': 'loss', 'content': 0.025780126452445984, 'timestamp': '2025-09-10 02:28:17.863777', 'step': 2235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:17.892202', 'step': 2235, 'epoch': 1} {'type': 'loss', 'content': 0.03770305588841438, 'timestamp': '2025-09-10 02:28:17.915590', 'step': 2236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:17.944436', 'step': 2236, 'epoch': 1} {'type': 'loss', 'content': 0.04107164219021797, 'timestamp': '2025-09-10 02:28:17.946249', 'step': 2237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:17.975112', 'step': 2237, 'epoch': 1} {'type': 'loss', 'content': 0.03411278501152992, 'timestamp': '2025-09-10 02:28:17.976925', 'step': 2238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:18.005288', 'step': 2238, 'epoch': 1} {'type': 'loss', 'content': 0.05005975812673569, 'timestamp': '2025-09-10 02:28:18.007055', 'step': 2239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:18.035926', 'step': 2239, 'epoch': 1} {'type': 'loss', 'content': 0.00835686270147562, 'timestamp': '2025-09-10 02:28:18.059239', 'step': 2240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:18.088976', 'step': 2240, 'epoch': 1} {'type': 'loss', 'content': 0.02591152861714363, 'timestamp': '2025-09-10 02:28:18.090565', 'step': 2241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:18.119081', 'step': 2241, 'epoch': 1} {'type': 'loss', 'content': 0.028668338432908058, 'timestamp': '2025-09-10 02:28:18.120645', 'step': 2242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:18.148982', 'step': 2242, 'epoch': 1} {'type': 'loss', 'content': 0.03449665382504463, 'timestamp': '2025-09-10 02:28:18.150744', 'step': 2243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:18.179697', 'step': 2243, 'epoch': 1} {'type': 'loss', 'content': 0.03282909840345383, 'timestamp': '2025-09-10 02:28:18.204131', 'step': 2244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:18.233233', 'step': 2244, 'epoch': 1} {'type': 'loss', 'content': 0.027261529117822647, 'timestamp': '2025-09-10 02:28:18.234873', 'step': 2245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:18.263332', 'step': 2245, 'epoch': 1} {'type': 'loss', 'content': 0.05301208049058914, 'timestamp': '2025-09-10 02:28:18.265351', 'step': 2246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:18.294271', 'step': 2246, 'epoch': 1} {'type': 'loss', 'content': 0.0440392792224884, 'timestamp': '2025-09-10 02:28:18.296080', 'step': 2247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:18.324712', 'step': 2247, 'epoch': 1} {'type': 'loss', 'content': 0.018889544531702995, 'timestamp': '2025-09-10 02:28:18.348036', 'step': 2248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:18.376948', 'step': 2248, 'epoch': 1} {'type': 'loss', 'content': 0.020905017852783203, 'timestamp': '2025-09-10 02:28:18.378676', 'step': 2249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:18.407163', 'step': 2249, 'epoch': 1} {'type': 'loss', 'content': 0.005557719152420759, 'timestamp': '2025-09-10 02:28:18.409099', 'step': 2250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:18.437819', 'step': 2250, 'epoch': 1} {'type': 'loss', 'content': 0.02418561466038227, 'timestamp': '2025-09-10 02:28:18.439585', 'step': 2251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:18.468127', 'step': 2251, 'epoch': 1} {'type': 'loss', 'content': 0.008221170864999294, 'timestamp': '2025-09-10 02:28:18.491396', 'step': 2252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:18.520676', 'step': 2252, 'epoch': 1} {'type': 'loss', 'content': 0.057192351669073105, 'timestamp': '2025-09-10 02:28:18.522478', 'step': 2253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:18.551660', 'step': 2253, 'epoch': 1} {'type': 'loss', 'content': 0.04847079887986183, 'timestamp': '2025-09-10 02:28:18.553501', 'step': 2254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:18.582710', 'step': 2254, 'epoch': 1} {'type': 'loss', 'content': 0.008429114706814289, 'timestamp': '2025-09-10 02:28:18.584885', 'step': 2255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:18.614160', 'step': 2255, 'epoch': 1} {'type': 'loss', 'content': 0.050681017339229584, 'timestamp': '2025-09-10 02:28:18.637548', 'step': 2256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:18.666711', 'step': 2256, 'epoch': 1} {'type': 'loss', 'content': 0.010908703319728374, 'timestamp': '2025-09-10 02:28:18.668414', 'step': 2257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:18.697302', 'step': 2257, 'epoch': 1} {'type': 'loss', 'content': 0.06985725462436676, 'timestamp': '2025-09-10 02:28:18.699233', 'step': 2258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:18.728114', 'step': 2258, 'epoch': 1} {'type': 'loss', 'content': 0.02067830041050911, 'timestamp': '2025-09-10 02:28:18.730038', 'step': 2259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:18.758412', 'step': 2259, 'epoch': 1} {'type': 'loss', 'content': 0.012844773940742016, 'timestamp': '2025-09-10 02:28:18.781691', 'step': 2260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:18.810600', 'step': 2260, 'epoch': 1} {'type': 'loss', 'content': 0.03253468871116638, 'timestamp': '2025-09-10 02:28:18.812226', 'step': 2261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:18.844068', 'step': 2261, 'epoch': 1} {'type': 'loss', 'content': 0.03450094908475876, 'timestamp': '2025-09-10 02:28:18.845693', 'step': 2262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:18.874417', 'step': 2262, 'epoch': 1} {'type': 'loss', 'content': 0.022504892200231552, 'timestamp': '2025-09-10 02:28:18.875815', 'step': 2263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:18.904083', 'step': 2263, 'epoch': 1} {'type': 'loss', 'content': 0.0401475764811039, 'timestamp': '2025-09-10 02:28:18.927227', 'step': 2264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:18.956319', 'step': 2264, 'epoch': 1} {'type': 'loss', 'content': 0.027728352695703506, 'timestamp': '2025-09-10 02:28:18.958162', 'step': 2265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:18.986592', 'step': 2265, 'epoch': 1} {'type': 'loss', 'content': 0.07560262829065323, 'timestamp': '2025-09-10 02:28:18.988460', 'step': 2266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:19.017184', 'step': 2266, 'epoch': 1} {'type': 'loss', 'content': 0.040750812739133835, 'timestamp': '2025-09-10 02:28:19.018987', 'step': 2267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:19.047728', 'step': 2267, 'epoch': 1} {'type': 'loss', 'content': 0.02107156440615654, 'timestamp': '2025-09-10 02:28:19.071111', 'step': 2268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:19.099408', 'step': 2268, 'epoch': 1} {'type': 'loss', 'content': 0.03714641183614731, 'timestamp': '2025-09-10 02:28:19.101161', 'step': 2269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:19.129974', 'step': 2269, 'epoch': 1} {'type': 'loss', 'content': 0.056046221405267715, 'timestamp': '2025-09-10 02:28:19.131866', 'step': 2270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:19.160272', 'step': 2270, 'epoch': 1} {'type': 'loss', 'content': 0.013576915487647057, 'timestamp': '2025-09-10 02:28:19.161962', 'step': 2271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:19.190806', 'step': 2271, 'epoch': 1} {'type': 'loss', 'content': 0.05251288786530495, 'timestamp': '2025-09-10 02:28:19.214233', 'step': 2272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:19.243421', 'step': 2272, 'epoch': 1} {'type': 'loss', 'content': 0.03815519064664841, 'timestamp': '2025-09-10 02:28:19.245174', 'step': 2273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:19.273712', 'step': 2273, 'epoch': 1} {'type': 'loss', 'content': 0.010572902858257294, 'timestamp': '2025-09-10 02:28:19.275260', 'step': 2274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:19.304198', 'step': 2274, 'epoch': 1} {'type': 'loss', 'content': 0.021908242255449295, 'timestamp': '2025-09-10 02:28:19.305790', 'step': 2275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:19.334197', 'step': 2275, 'epoch': 1} {'type': 'loss', 'content': 0.015945082530379295, 'timestamp': '2025-09-10 02:28:19.357196', 'step': 2276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:19.385567', 'step': 2276, 'epoch': 1} {'type': 'loss', 'content': 0.03457672521471977, 'timestamp': '2025-09-10 02:28:19.387171', 'step': 2277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:19.415331', 'step': 2277, 'epoch': 1} {'type': 'loss', 'content': 0.0339481346309185, 'timestamp': '2025-09-10 02:28:19.416723', 'step': 2278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:19.445017', 'step': 2278, 'epoch': 1} {'type': 'loss', 'content': 0.0076408893801271915, 'timestamp': '2025-09-10 02:28:19.446861', 'step': 2279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:19.475794', 'step': 2279, 'epoch': 1} {'type': 'loss', 'content': 0.033159803599119186, 'timestamp': '2025-09-10 02:28:19.498977', 'step': 2280, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1582003754624}], 'timestamp': '2025-09-10 02:28:21.429802', 'step': 2280, 'epoch': 1} {'type': 'pplx', 'content': 2403803.710891873, 'timestamp': '2025-09-10 02:28:21.431776', 'step': 2280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:21.459724', 'step': 2280, 'epoch': 1} {'type': 'loss', 'content': 0.03758122771978378, 'timestamp': '2025-09-10 02:28:21.461570', 'step': 2281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:21.497252', 'step': 2281, 'epoch': 1} {'type': 'loss', 'content': 0.023119444027543068, 'timestamp': '2025-09-10 02:28:21.499893', 'step': 2282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:21.537046', 'step': 2282, 'epoch': 1} {'type': 'loss', 'content': 0.013347284868359566, 'timestamp': '2025-09-10 02:28:21.538981', 'step': 2283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:21.567469', 'step': 2283, 'epoch': 1} {'type': 'loss', 'content': 0.04204104468226433, 'timestamp': '2025-09-10 02:28:21.591244', 'step': 2284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:21.625555', 'step': 2284, 'epoch': 1} {'type': 'loss', 'content': 0.013740544207394123, 'timestamp': '2025-09-10 02:28:21.627380', 'step': 2285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:21.657659', 'step': 2285, 'epoch': 1} {'type': 'loss', 'content': 0.021617084741592407, 'timestamp': '2025-09-10 02:28:21.659424', 'step': 2286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:21.689384', 'step': 2286, 'epoch': 1} {'type': 'loss', 'content': 0.03826426342129707, 'timestamp': '2025-09-10 02:28:21.691136', 'step': 2287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:21.719949', 'step': 2287, 'epoch': 1} {'type': 'loss', 'content': 0.01925254799425602, 'timestamp': '2025-09-10 02:28:21.745498', 'step': 2288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:21.780114', 'step': 2288, 'epoch': 1} {'type': 'loss', 'content': 0.025817211717367172, 'timestamp': '2025-09-10 02:28:21.788792', 'step': 2289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:21.824458', 'step': 2289, 'epoch': 1} {'type': 'loss', 'content': 0.0554530955851078, 'timestamp': '2025-09-10 02:28:21.826352', 'step': 2290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:21.856428', 'step': 2290, 'epoch': 1} {'type': 'loss', 'content': 0.029752230271697044, 'timestamp': '2025-09-10 02:28:21.858270', 'step': 2291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:21.888547', 'step': 2291, 'epoch': 1} {'type': 'loss', 'content': 0.01790672540664673, 'timestamp': '2025-09-10 02:28:21.911910', 'step': 2292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:21.941686', 'step': 2292, 'epoch': 1} {'type': 'loss', 'content': 0.003182124812155962, 'timestamp': '2025-09-10 02:28:21.943268', 'step': 2293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:21.972996', 'step': 2293, 'epoch': 1} {'type': 'loss', 'content': 0.035757869482040405, 'timestamp': '2025-09-10 02:28:21.974715', 'step': 2294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:22.008895', 'step': 2294, 'epoch': 1} {'type': 'loss', 'content': 0.04373505339026451, 'timestamp': '2025-09-10 02:28:22.014160', 'step': 2295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:22.057135', 'step': 2295, 'epoch': 1} {'type': 'loss', 'content': 0.02016693353652954, 'timestamp': '2025-09-10 02:28:22.080508', 'step': 2296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:22.109234', 'step': 2296, 'epoch': 1} {'type': 'loss', 'content': 0.025444846600294113, 'timestamp': '2025-09-10 02:28:22.116233', 'step': 2297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:22.148342', 'step': 2297, 'epoch': 1} {'type': 'loss', 'content': 0.018856560811400414, 'timestamp': '2025-09-10 02:28:22.150694', 'step': 2298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:22.179919', 'step': 2298, 'epoch': 1} {'type': 'loss', 'content': 0.017691995948553085, 'timestamp': '2025-09-10 02:28:22.181559', 'step': 2299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:22.213359', 'step': 2299, 'epoch': 1} {'type': 'loss', 'content': 0.02185535989701748, 'timestamp': '2025-09-10 02:28:22.237290', 'step': 2300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:22.266702', 'step': 2300, 'epoch': 1} {'type': 'loss', 'content': 0.027811145409941673, 'timestamp': '2025-09-10 02:28:22.268282', 'step': 2301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:22.297528', 'step': 2301, 'epoch': 1} {'type': 'loss', 'content': 0.010415504686534405, 'timestamp': '2025-09-10 02:28:22.299360', 'step': 2302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:22.328430', 'step': 2302, 'epoch': 1} {'type': 'loss', 'content': 0.044645339250564575, 'timestamp': '2025-09-10 02:28:22.334628', 'step': 2303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:22.368138', 'step': 2303, 'epoch': 1} {'type': 'loss', 'content': 0.044401075690984726, 'timestamp': '2025-09-10 02:28:22.391698', 'step': 2304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:22.420430', 'step': 2304, 'epoch': 1} {'type': 'loss', 'content': 0.03624344989657402, 'timestamp': '2025-09-10 02:28:22.424397', 'step': 2305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:22.455969', 'step': 2305, 'epoch': 1} {'type': 'loss', 'content': 0.008225697092711926, 'timestamp': '2025-09-10 02:28:22.459334', 'step': 2306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:22.489543', 'step': 2306, 'epoch': 1} {'type': 'loss', 'content': 0.005880584474653006, 'timestamp': '2025-09-10 02:28:22.491068', 'step': 2307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:22.520533', 'step': 2307, 'epoch': 1} {'type': 'loss', 'content': 0.017149990424513817, 'timestamp': '2025-09-10 02:28:22.543728', 'step': 2308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:22.572790', 'step': 2308, 'epoch': 1} {'type': 'loss', 'content': 0.022219756618142128, 'timestamp': '2025-09-10 02:28:22.575380', 'step': 2309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:22.604584', 'step': 2309, 'epoch': 1} {'type': 'loss', 'content': 0.04094681143760681, 'timestamp': '2025-09-10 02:28:22.608412', 'step': 2310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:22.638363', 'step': 2310, 'epoch': 1} {'type': 'loss', 'content': 0.04218247905373573, 'timestamp': '2025-09-10 02:28:22.640481', 'step': 2311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:22.669377', 'step': 2311, 'epoch': 1} {'type': 'loss', 'content': 0.015587793663144112, 'timestamp': '2025-09-10 02:28:22.692837', 'step': 2312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:22.722106', 'step': 2312, 'epoch': 1} {'type': 'loss', 'content': 0.03662145137786865, 'timestamp': '2025-09-10 02:28:22.724084', 'step': 2313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:22.752690', 'step': 2313, 'epoch': 1} {'type': 'loss', 'content': 0.04511374607682228, 'timestamp': '2025-09-10 02:28:22.754660', 'step': 2314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:22.784034', 'step': 2314, 'epoch': 1} {'type': 'loss', 'content': 0.0072469934821128845, 'timestamp': '2025-09-10 02:28:22.785842', 'step': 2315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:22.815703', 'step': 2315, 'epoch': 1} {'type': 'loss', 'content': 0.06467636674642563, 'timestamp': '2025-09-10 02:28:22.843148', 'step': 2316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:22.872120', 'step': 2316, 'epoch': 1} {'type': 'loss', 'content': 0.02188717946410179, 'timestamp': '2025-09-10 02:28:22.874010', 'step': 2317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:22.906706', 'step': 2317, 'epoch': 1} {'type': 'loss', 'content': 0.002025757683441043, 'timestamp': '2025-09-10 02:28:22.908478', 'step': 2318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:22.937670', 'step': 2318, 'epoch': 1} {'type': 'loss', 'content': 0.030509835109114647, 'timestamp': '2025-09-10 02:28:22.939480', 'step': 2319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:22.968050', 'step': 2319, 'epoch': 1} {'type': 'loss', 'content': 0.032555047422647476, 'timestamp': '2025-09-10 02:28:22.991850', 'step': 2320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:23.020504', 'step': 2320, 'epoch': 1} {'type': 'loss', 'content': 0.007047518156468868, 'timestamp': '2025-09-10 02:28:23.022297', 'step': 2321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:23.051265', 'step': 2321, 'epoch': 1} {'type': 'loss', 'content': 0.02716813050210476, 'timestamp': '2025-09-10 02:28:23.053334', 'step': 2322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:23.084766', 'step': 2322, 'epoch': 1} {'type': 'loss', 'content': 0.010094745084643364, 'timestamp': '2025-09-10 02:28:23.086305', 'step': 2323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:23.115075', 'step': 2323, 'epoch': 1} {'type': 'loss', 'content': 0.0395091250538826, 'timestamp': '2025-09-10 02:28:23.140194', 'step': 2324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:23.173715', 'step': 2324, 'epoch': 1} {'type': 'loss', 'content': 0.008416331373155117, 'timestamp': '2025-09-10 02:28:23.175484', 'step': 2325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:23.205189', 'step': 2325, 'epoch': 1} {'type': 'loss', 'content': 0.05678853020071983, 'timestamp': '2025-09-10 02:28:23.218279', 'step': 2326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:23.252996', 'step': 2326, 'epoch': 1} {'type': 'loss', 'content': 0.021840985864400864, 'timestamp': '2025-09-10 02:28:23.254588', 'step': 2327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:23.282882', 'step': 2327, 'epoch': 1} {'type': 'loss', 'content': 0.012246578000485897, 'timestamp': '2025-09-10 02:28:23.305871', 'step': 2328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:23.335494', 'step': 2328, 'epoch': 1} {'type': 'loss', 'content': 0.0025970342103391886, 'timestamp': '2025-09-10 02:28:23.337085', 'step': 2329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:23.365612', 'step': 2329, 'epoch': 1} {'type': 'loss', 'content': 0.014994210563600063, 'timestamp': '2025-09-10 02:28:23.367058', 'step': 2330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:23.396160', 'step': 2330, 'epoch': 1} {'type': 'loss', 'content': 0.04930217191576958, 'timestamp': '2025-09-10 02:28:23.398098', 'step': 2331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:23.430981', 'step': 2331, 'epoch': 1} {'type': 'loss', 'content': 0.03074464574456215, 'timestamp': '2025-09-10 02:28:23.454377', 'step': 2332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:23.483111', 'step': 2332, 'epoch': 1} {'type': 'loss', 'content': 0.007758311927318573, 'timestamp': '2025-09-10 02:28:23.484706', 'step': 2333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:23.512996', 'step': 2333, 'epoch': 1} {'type': 'loss', 'content': 0.04545729607343674, 'timestamp': '2025-09-10 02:28:23.515515', 'step': 2334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:23.546808', 'step': 2334, 'epoch': 1} {'type': 'loss', 'content': 0.02094576694071293, 'timestamp': '2025-09-10 02:28:23.548530', 'step': 2335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:23.576901', 'step': 2335, 'epoch': 1} {'type': 'loss', 'content': 0.06826934218406677, 'timestamp': '2025-09-10 02:28:23.602122', 'step': 2336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:23.631333', 'step': 2336, 'epoch': 1} {'type': 'loss', 'content': 0.03085923008620739, 'timestamp': '2025-09-10 02:28:23.633145', 'step': 2337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:23.666827', 'step': 2337, 'epoch': 1} {'type': 'loss', 'content': 0.030749622732400894, 'timestamp': '2025-09-10 02:28:23.668335', 'step': 2338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:23.696322', 'step': 2338, 'epoch': 1} {'type': 'loss', 'content': 0.01941312849521637, 'timestamp': '2025-09-10 02:28:23.698081', 'step': 2339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:23.726707', 'step': 2339, 'epoch': 1} {'type': 'loss', 'content': 0.017771733924746513, 'timestamp': '2025-09-10 02:28:23.750074', 'step': 2340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:23.779101', 'step': 2340, 'epoch': 1} {'type': 'loss', 'content': 0.013400768861174583, 'timestamp': '2025-09-10 02:28:23.780807', 'step': 2341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:23.809856', 'step': 2341, 'epoch': 1} {'type': 'loss', 'content': 0.0965103730559349, 'timestamp': '2025-09-10 02:28:23.811538', 'step': 2342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:23.839893', 'step': 2342, 'epoch': 1} {'type': 'loss', 'content': 0.05872729793190956, 'timestamp': '2025-09-10 02:28:23.841605', 'step': 2343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:23.870117', 'step': 2343, 'epoch': 1} {'type': 'loss', 'content': 0.016339726746082306, 'timestamp': '2025-09-10 02:28:23.893358', 'step': 2344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:23.922207', 'step': 2344, 'epoch': 1} {'type': 'loss', 'content': 0.03597145155072212, 'timestamp': '2025-09-10 02:28:23.924219', 'step': 2345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:23.953054', 'step': 2345, 'epoch': 1} {'type': 'loss', 'content': 0.01645306684076786, 'timestamp': '2025-09-10 02:28:23.954778', 'step': 2346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:23.984742', 'step': 2346, 'epoch': 1} {'type': 'loss', 'content': 0.01860763132572174, 'timestamp': '2025-09-10 02:28:23.986848', 'step': 2347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:24.022252', 'step': 2347, 'epoch': 1} {'type': 'loss', 'content': 0.05072319507598877, 'timestamp': '2025-09-10 02:28:24.045706', 'step': 2348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:24.081360', 'step': 2348, 'epoch': 1} {'type': 'loss', 'content': 0.05628179386258125, 'timestamp': '2025-09-10 02:28:24.083012', 'step': 2349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:24.111781', 'step': 2349, 'epoch': 1} {'type': 'loss', 'content': 0.04576278105378151, 'timestamp': '2025-09-10 02:28:24.113968', 'step': 2350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:24.149164', 'step': 2350, 'epoch': 1} {'type': 'loss', 'content': 0.007431622128933668, 'timestamp': '2025-09-10 02:28:24.151385', 'step': 2351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:24.184950', 'step': 2351, 'epoch': 1} {'type': 'loss', 'content': 0.006663801614195108, 'timestamp': '2025-09-10 02:28:24.209918', 'step': 2352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:24.245976', 'step': 2352, 'epoch': 1} {'type': 'loss', 'content': 0.001480492064729333, 'timestamp': '2025-09-10 02:28:24.247928', 'step': 2353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:24.277522', 'step': 2353, 'epoch': 1} {'type': 'loss', 'content': 0.07069874554872513, 'timestamp': '2025-09-10 02:28:24.284399', 'step': 2354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:24.313133', 'step': 2354, 'epoch': 1} {'type': 'loss', 'content': 0.012294160202145576, 'timestamp': '2025-09-10 02:28:24.316011', 'step': 2355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:24.346965', 'step': 2355, 'epoch': 1} {'type': 'loss', 'content': 0.08251728117465973, 'timestamp': '2025-09-10 02:28:24.370456', 'step': 2356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:24.399111', 'step': 2356, 'epoch': 1} {'type': 'loss', 'content': 0.016314489766955376, 'timestamp': '2025-09-10 02:28:24.406264', 'step': 2357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:24.438698', 'step': 2357, 'epoch': 1} {'type': 'loss', 'content': 0.01033691130578518, 'timestamp': '2025-09-10 02:28:24.440832', 'step': 2358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:24.470377', 'step': 2358, 'epoch': 1} {'type': 'loss', 'content': 0.03003569133579731, 'timestamp': '2025-09-10 02:28:24.472055', 'step': 2359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:24.500949', 'step': 2359, 'epoch': 1} {'type': 'loss', 'content': 0.04151889681816101, 'timestamp': '2025-09-10 02:28:24.524180', 'step': 2360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:24.557120', 'step': 2360, 'epoch': 1} {'type': 'loss', 'content': 0.09053687751293182, 'timestamp': '2025-09-10 02:28:24.560219', 'step': 2361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:24.591845', 'step': 2361, 'epoch': 1} {'type': 'loss', 'content': 0.03297692909836769, 'timestamp': '2025-09-10 02:28:24.593591', 'step': 2362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:24.622366', 'step': 2362, 'epoch': 1} {'type': 'loss', 'content': 0.03841349110007286, 'timestamp': '2025-09-10 02:28:24.624428', 'step': 2363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:24.663507', 'step': 2363, 'epoch': 1} {'type': 'loss', 'content': 0.027684299275279045, 'timestamp': '2025-09-10 02:28:24.686869', 'step': 2364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:24.719249', 'step': 2364, 'epoch': 1} {'type': 'loss', 'content': 0.029074719175696373, 'timestamp': '2025-09-10 02:28:24.721098', 'step': 2365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:24.760550', 'step': 2365, 'epoch': 1} {'type': 'loss', 'content': 0.029994269832968712, 'timestamp': '2025-09-10 02:28:24.766371', 'step': 2366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:24.795107', 'step': 2366, 'epoch': 1} {'type': 'loss', 'content': 0.020649809390306473, 'timestamp': '2025-09-10 02:28:24.799632', 'step': 2367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:24.828848', 'step': 2367, 'epoch': 1} {'type': 'loss', 'content': 0.01844174601137638, 'timestamp': '2025-09-10 02:28:24.854376', 'step': 2368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:24.883544', 'step': 2368, 'epoch': 1} {'type': 'loss', 'content': 0.05636049434542656, 'timestamp': '2025-09-10 02:28:24.885418', 'step': 2369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:24.914109', 'step': 2369, 'epoch': 1} {'type': 'loss', 'content': 0.020002515986561775, 'timestamp': '2025-09-10 02:28:24.915931', 'step': 2370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:24.944417', 'step': 2370, 'epoch': 1} {'type': 'loss', 'content': 0.04524189978837967, 'timestamp': '2025-09-10 02:28:24.946226', 'step': 2371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:24.975227', 'step': 2371, 'epoch': 1} {'type': 'loss', 'content': 0.010690070688724518, 'timestamp': '2025-09-10 02:28:24.998592', 'step': 2372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:25.027703', 'step': 2372, 'epoch': 1} {'type': 'loss', 'content': 0.0136357257142663, 'timestamp': '2025-09-10 02:28:25.029781', 'step': 2373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:25.058558', 'step': 2373, 'epoch': 1} {'type': 'loss', 'content': 0.052628517150878906, 'timestamp': '2025-09-10 02:28:25.061751', 'step': 2374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:25.090202', 'step': 2374, 'epoch': 1} {'type': 'loss', 'content': 0.009049796499311924, 'timestamp': '2025-09-10 02:28:25.095938', 'step': 2375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:25.126599', 'step': 2375, 'epoch': 1} {'type': 'loss', 'content': 0.029583189636468887, 'timestamp': '2025-09-10 02:28:25.150071', 'step': 2376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:25.183891', 'step': 2376, 'epoch': 1} {'type': 'loss', 'content': 0.03513399139046669, 'timestamp': '2025-09-10 02:28:25.185911', 'step': 2377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:25.216581', 'step': 2377, 'epoch': 1} {'type': 'loss', 'content': 0.03157564997673035, 'timestamp': '2025-09-10 02:28:25.218536', 'step': 2378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:25.252019', 'step': 2378, 'epoch': 1} {'type': 'loss', 'content': 0.02360604517161846, 'timestamp': '2025-09-10 02:28:25.253911', 'step': 2379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:25.289690', 'step': 2379, 'epoch': 1} {'type': 'loss', 'content': 0.018449855968356133, 'timestamp': '2025-09-10 02:28:25.313095', 'step': 2380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:25.342042', 'step': 2380, 'epoch': 1} {'type': 'loss', 'content': 0.04448559135198593, 'timestamp': '2025-09-10 02:28:25.351552', 'step': 2381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:25.380942', 'step': 2381, 'epoch': 1} {'type': 'loss', 'content': 0.04531589522957802, 'timestamp': '2025-09-10 02:28:25.389388', 'step': 2382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:25.419050', 'step': 2382, 'epoch': 1} {'type': 'loss', 'content': 0.02107251062989235, 'timestamp': '2025-09-10 02:28:25.421129', 'step': 2383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:25.449984', 'step': 2383, 'epoch': 1} {'type': 'loss', 'content': 0.033820513635873795, 'timestamp': '2025-09-10 02:28:25.473501', 'step': 2384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:25.502339', 'step': 2384, 'epoch': 1} {'type': 'loss', 'content': 0.032583147287368774, 'timestamp': '2025-09-10 02:28:25.505527', 'step': 2385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:25.541452', 'step': 2385, 'epoch': 1} {'type': 'loss', 'content': 0.0029801710043102503, 'timestamp': '2025-09-10 02:28:25.546861', 'step': 2386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:25.579754', 'step': 2386, 'epoch': 1} {'type': 'loss', 'content': 0.02128872647881508, 'timestamp': '2025-09-10 02:28:25.581502', 'step': 2387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:25.610420', 'step': 2387, 'epoch': 1} {'type': 'loss', 'content': 0.04043685272336006, 'timestamp': '2025-09-10 02:28:25.634014', 'step': 2388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:25.663181', 'step': 2388, 'epoch': 1} {'type': 'loss', 'content': 0.015649553388357162, 'timestamp': '2025-09-10 02:28:25.665081', 'step': 2389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:25.693808', 'step': 2389, 'epoch': 1} {'type': 'loss', 'content': 0.010012378916144371, 'timestamp': '2025-09-10 02:28:25.695714', 'step': 2390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:25.724165', 'step': 2390, 'epoch': 1} {'type': 'loss', 'content': 0.05852745845913887, 'timestamp': '2025-09-10 02:28:25.726921', 'step': 2391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:25.755566', 'step': 2391, 'epoch': 1} {'type': 'loss', 'content': 0.034893278032541275, 'timestamp': '2025-09-10 02:28:25.779627', 'step': 2392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:25.810632', 'step': 2392, 'epoch': 1} {'type': 'loss', 'content': 0.016062278300523758, 'timestamp': '2025-09-10 02:28:25.814072', 'step': 2393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:25.843631', 'step': 2393, 'epoch': 1} {'type': 'loss', 'content': 0.05764563009142876, 'timestamp': '2025-09-10 02:28:25.845531', 'step': 2394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:25.873863', 'step': 2394, 'epoch': 1} {'type': 'loss', 'content': 0.01467811968177557, 'timestamp': '2025-09-10 02:28:25.875926', 'step': 2395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:25.904767', 'step': 2395, 'epoch': 1} {'type': 'loss', 'content': 0.06349930167198181, 'timestamp': '2025-09-10 02:28:25.928149', 'step': 2396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:25.962545', 'step': 2396, 'epoch': 1} {'type': 'loss', 'content': 0.029672754928469658, 'timestamp': '2025-09-10 02:28:25.964580', 'step': 2397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:25.994663', 'step': 2397, 'epoch': 1} {'type': 'loss', 'content': 0.039500582963228226, 'timestamp': '2025-09-10 02:28:26.007527', 'step': 2398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:26.042457', 'step': 2398, 'epoch': 1} {'type': 'loss', 'content': 0.02818789891898632, 'timestamp': '2025-09-10 02:28:26.049377', 'step': 2399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:26.079190', 'step': 2399, 'epoch': 1} {'type': 'loss', 'content': 0.028065938502550125, 'timestamp': '2025-09-10 02:28:26.107328', 'step': 2400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:26.136251', 'step': 2400, 'epoch': 1} {'type': 'loss', 'content': 0.021312285214662552, 'timestamp': '2025-09-10 02:28:26.138140', 'step': 2401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:26.166999', 'step': 2401, 'epoch': 1} {'type': 'loss', 'content': 0.0122200483456254, 'timestamp': '2025-09-10 02:28:26.178257', 'step': 2402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:26.221337', 'step': 2402, 'epoch': 1} {'type': 'loss', 'content': 0.02185259386897087, 'timestamp': '2025-09-10 02:28:26.223343', 'step': 2403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:26.252427', 'step': 2403, 'epoch': 1} {'type': 'loss', 'content': 0.031530894339084625, 'timestamp': '2025-09-10 02:28:26.275946', 'step': 2404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:26.305987', 'step': 2404, 'epoch': 1} {'type': 'loss', 'content': 0.026655616238713264, 'timestamp': '2025-09-10 02:28:26.307787', 'step': 2405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:26.336449', 'step': 2405, 'epoch': 1} {'type': 'loss', 'content': 0.019431522116065025, 'timestamp': '2025-09-10 02:28:26.338237', 'step': 2406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:26.367638', 'step': 2406, 'epoch': 1} {'type': 'loss', 'content': 0.016600485891103745, 'timestamp': '2025-09-10 02:28:26.373672', 'step': 2407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:26.403809', 'step': 2407, 'epoch': 1} {'type': 'loss', 'content': 0.03890708088874817, 'timestamp': '2025-09-10 02:28:26.427366', 'step': 2408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:26.456694', 'step': 2408, 'epoch': 1} {'type': 'loss', 'content': 0.03087475337088108, 'timestamp': '2025-09-10 02:28:26.460779', 'step': 2409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:26.491110', 'step': 2409, 'epoch': 1} {'type': 'loss', 'content': 0.05119401216506958, 'timestamp': '2025-09-10 02:28:26.492827', 'step': 2410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:26.521324', 'step': 2410, 'epoch': 1} {'type': 'loss', 'content': 0.026257485151290894, 'timestamp': '2025-09-10 02:28:26.523896', 'step': 2411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:26.552429', 'step': 2411, 'epoch': 1} {'type': 'loss', 'content': 0.04165105149149895, 'timestamp': '2025-09-10 02:28:26.575994', 'step': 2412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:26.605181', 'step': 2412, 'epoch': 1} {'type': 'loss', 'content': 0.018239473924040794, 'timestamp': '2025-09-10 02:28:26.607228', 'step': 2413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:26.635981', 'step': 2413, 'epoch': 1} {'type': 'loss', 'content': 0.06883590668439865, 'timestamp': '2025-09-10 02:28:26.640361', 'step': 2414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:26.669169', 'step': 2414, 'epoch': 1} {'type': 'loss', 'content': 0.006214165594428778, 'timestamp': '2025-09-10 02:28:26.672815', 'step': 2415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:26.703564', 'step': 2415, 'epoch': 1} {'type': 'loss', 'content': 0.013146909885108471, 'timestamp': '2025-09-10 02:28:26.727396', 'step': 2416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:26.756980', 'step': 2416, 'epoch': 1} {'type': 'loss', 'content': 0.004848008044064045, 'timestamp': '2025-09-10 02:28:26.758940', 'step': 2417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:26.787498', 'step': 2417, 'epoch': 1} {'type': 'loss', 'content': 0.017544735223054886, 'timestamp': '2025-09-10 02:28:26.791740', 'step': 2418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:26.826304', 'step': 2418, 'epoch': 1} {'type': 'loss', 'content': 0.03728755936026573, 'timestamp': '2025-09-10 02:28:26.831474', 'step': 2419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:26.869022', 'step': 2419, 'epoch': 1} {'type': 'loss', 'content': 0.05490528419613838, 'timestamp': '2025-09-10 02:28:26.894754', 'step': 2420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:26.923482', 'step': 2420, 'epoch': 1} {'type': 'loss', 'content': 0.025458218529820442, 'timestamp': '2025-09-10 02:28:26.927694', 'step': 2421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:26.959886', 'step': 2421, 'epoch': 1} {'type': 'loss', 'content': 0.024190718308091164, 'timestamp': '2025-09-10 02:28:26.961622', 'step': 2422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:26.992026', 'step': 2422, 'epoch': 1} {'type': 'loss', 'content': 0.02364364080131054, 'timestamp': '2025-09-10 02:28:26.993781', 'step': 2423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:27.022244', 'step': 2423, 'epoch': 1} {'type': 'loss', 'content': 0.013525965623557568, 'timestamp': '2025-09-10 02:28:27.045516', 'step': 2424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:27.074357', 'step': 2424, 'epoch': 1} {'type': 'loss', 'content': 0.048916157335042953, 'timestamp': '2025-09-10 02:28:27.076086', 'step': 2425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:27.105435', 'step': 2425, 'epoch': 1} {'type': 'loss', 'content': 0.018245337530970573, 'timestamp': '2025-09-10 02:28:27.107376', 'step': 2426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:27.136046', 'step': 2426, 'epoch': 1} {'type': 'loss', 'content': 0.03028123266994953, 'timestamp': '2025-09-10 02:28:27.138698', 'step': 2427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:27.171574', 'step': 2427, 'epoch': 1} {'type': 'loss', 'content': 0.04529779404401779, 'timestamp': '2025-09-10 02:28:27.194833', 'step': 2428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:27.227138', 'step': 2428, 'epoch': 1} {'type': 'loss', 'content': 0.04372987896203995, 'timestamp': '2025-09-10 02:28:27.228936', 'step': 2429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:27.257997', 'step': 2429, 'epoch': 1} {'type': 'loss', 'content': 0.04191436618566513, 'timestamp': '2025-09-10 02:28:27.260828', 'step': 2430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:27.289611', 'step': 2430, 'epoch': 1} {'type': 'loss', 'content': 0.05567625164985657, 'timestamp': '2025-09-10 02:28:27.291373', 'step': 2431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:27.320078', 'step': 2431, 'epoch': 1} {'type': 'loss', 'content': 0.033835552632808685, 'timestamp': '2025-09-10 02:28:27.343919', 'step': 2432, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1582003754624}], 'timestamp': '2025-09-10 02:28:29.386312', 'step': 2432, 'epoch': 1} {'type': 'pplx', 'content': 2158993.796437208, 'timestamp': '2025-09-10 02:28:29.388121', 'step': 2432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:29.417023', 'step': 2432, 'epoch': 1} {'type': 'loss', 'content': 0.016373800113797188, 'timestamp': '2025-09-10 02:28:29.418840', 'step': 2433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:29.448004', 'step': 2433, 'epoch': 1} {'type': 'loss', 'content': 0.021115172654390335, 'timestamp': '2025-09-10 02:28:29.450131', 'step': 2434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:29.479141', 'step': 2434, 'epoch': 1} {'type': 'loss', 'content': 0.02003416419029236, 'timestamp': '2025-09-10 02:28:29.485171', 'step': 2435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [1, 80], 'flops': 593517404912}, 'timestamp': '2025-09-10 02:28:29.587739', 'step': 2435, 'epoch': 1} {'type': 'loss', 'content': 0.04149497672915459, 'timestamp': '2025-09-10 02:28:29.612980', 'step': 2436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:29.642893', 'step': 2436, 'epoch': 2} {'type': 'loss', 'content': 0.030175983905792236, 'timestamp': '2025-09-10 02:28:29.649266', 'step': 2437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:29.680927', 'step': 2437, 'epoch': 2} {'type': 'loss', 'content': 0.02488701418042183, 'timestamp': '2025-09-10 02:28:29.683191', 'step': 2438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:29.717322', 'step': 2438, 'epoch': 2} {'type': 'loss', 'content': 0.01973804645240307, 'timestamp': '2025-09-10 02:28:29.722599', 'step': 2439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:29.751533', 'step': 2439, 'epoch': 2} {'type': 'loss', 'content': 0.05977180227637291, 'timestamp': '2025-09-10 02:28:29.774911', 'step': 2440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:29.807294', 'step': 2440, 'epoch': 2} {'type': 'loss', 'content': 0.033726807683706284, 'timestamp': '2025-09-10 02:28:29.809192', 'step': 2441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:29.843227', 'step': 2441, 'epoch': 2} {'type': 'loss', 'content': 0.004302346147596836, 'timestamp': '2025-09-10 02:28:29.845166', 'step': 2442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:29.874085', 'step': 2442, 'epoch': 2} {'type': 'loss', 'content': 0.0188862644135952, 'timestamp': '2025-09-10 02:28:29.878979', 'step': 2443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:29.908311', 'step': 2443, 'epoch': 2} {'type': 'loss', 'content': 0.02564084902405739, 'timestamp': '2025-09-10 02:28:29.931744', 'step': 2444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:29.974981', 'step': 2444, 'epoch': 2} {'type': 'loss', 'content': 0.028563078492879868, 'timestamp': '2025-09-10 02:28:29.983528', 'step': 2445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:30.013267', 'step': 2445, 'epoch': 2} {'type': 'loss', 'content': 0.020576000213623047, 'timestamp': '2025-09-10 02:28:30.015658', 'step': 2446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:30.045698', 'step': 2446, 'epoch': 2} {'type': 'loss', 'content': 0.037143293768167496, 'timestamp': '2025-09-10 02:28:30.047495', 'step': 2447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:30.078790', 'step': 2447, 'epoch': 2} {'type': 'loss', 'content': 0.02041972242295742, 'timestamp': '2025-09-10 02:28:30.102354', 'step': 2448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:30.144929', 'step': 2448, 'epoch': 2} {'type': 'loss', 'content': 0.03853491321206093, 'timestamp': '2025-09-10 02:28:30.149989', 'step': 2449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:30.181165', 'step': 2449, 'epoch': 2} {'type': 'loss', 'content': 0.02254076674580574, 'timestamp': '2025-09-10 02:28:30.185583', 'step': 2450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:30.218234', 'step': 2450, 'epoch': 2} {'type': 'loss', 'content': 0.016871871426701546, 'timestamp': '2025-09-10 02:28:30.230115', 'step': 2451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:30.263766', 'step': 2451, 'epoch': 2} {'type': 'loss', 'content': 0.05091606453061104, 'timestamp': '2025-09-10 02:28:30.287121', 'step': 2452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:30.316190', 'step': 2452, 'epoch': 2} {'type': 'loss', 'content': 0.016244972124695778, 'timestamp': '2025-09-10 02:28:30.318160', 'step': 2453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:30.347622', 'step': 2453, 'epoch': 2} {'type': 'loss', 'content': 0.008798770606517792, 'timestamp': '2025-09-10 02:28:30.349421', 'step': 2454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:30.379325', 'step': 2454, 'epoch': 2} {'type': 'loss', 'content': 0.026254108175635338, 'timestamp': '2025-09-10 02:28:30.381386', 'step': 2455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:30.413252', 'step': 2455, 'epoch': 2} {'type': 'loss', 'content': 0.01920224539935589, 'timestamp': '2025-09-10 02:28:30.436701', 'step': 2456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:30.470102', 'step': 2456, 'epoch': 2} {'type': 'loss', 'content': 0.013258195482194424, 'timestamp': '2025-09-10 02:28:30.472753', 'step': 2457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:30.501925', 'step': 2457, 'epoch': 2} {'type': 'loss', 'content': 0.017449038103222847, 'timestamp': '2025-09-10 02:28:30.504111', 'step': 2458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:30.532870', 'step': 2458, 'epoch': 2} {'type': 'loss', 'content': 0.032133761793375015, 'timestamp': '2025-09-10 02:28:30.534766', 'step': 2459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:30.563625', 'step': 2459, 'epoch': 2} {'type': 'loss', 'content': 0.01655067689716816, 'timestamp': '2025-09-10 02:28:30.587113', 'step': 2460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:30.616780', 'step': 2460, 'epoch': 2} {'type': 'loss', 'content': 0.013599888421595097, 'timestamp': '2025-09-10 02:28:30.618367', 'step': 2461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:30.647589', 'step': 2461, 'epoch': 2} {'type': 'loss', 'content': 0.017768505960702896, 'timestamp': '2025-09-10 02:28:30.649582', 'step': 2462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:30.678235', 'step': 2462, 'epoch': 2} {'type': 'loss', 'content': 0.004869905766099691, 'timestamp': '2025-09-10 02:28:30.680075', 'step': 2463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:30.708636', 'step': 2463, 'epoch': 2} {'type': 'loss', 'content': 0.018566574901342392, 'timestamp': '2025-09-10 02:28:30.732604', 'step': 2464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:30.763003', 'step': 2464, 'epoch': 2} {'type': 'loss', 'content': 0.0010703759035095572, 'timestamp': '2025-09-10 02:28:30.765249', 'step': 2465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:30.794831', 'step': 2465, 'epoch': 2} {'type': 'loss', 'content': 0.015269110910594463, 'timestamp': '2025-09-10 02:28:30.799862', 'step': 2466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:30.835106', 'step': 2466, 'epoch': 2} {'type': 'loss', 'content': 0.02452816627919674, 'timestamp': '2025-09-10 02:28:30.836868', 'step': 2467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:30.866402', 'step': 2467, 'epoch': 2} {'type': 'loss', 'content': 0.02096046321094036, 'timestamp': '2025-09-10 02:28:30.890075', 'step': 2468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:30.923340', 'step': 2468, 'epoch': 2} {'type': 'loss', 'content': 0.037257127463817596, 'timestamp': '2025-09-10 02:28:30.929124', 'step': 2469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:30.966046', 'step': 2469, 'epoch': 2} {'type': 'loss', 'content': 0.028221143409609795, 'timestamp': '2025-09-10 02:28:30.967798', 'step': 2470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:30.998221', 'step': 2470, 'epoch': 2} {'type': 'loss', 'content': 0.01037190668284893, 'timestamp': '2025-09-10 02:28:31.000282', 'step': 2471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:31.031593', 'step': 2471, 'epoch': 2} {'type': 'loss', 'content': 0.00449879327788949, 'timestamp': '2025-09-10 02:28:31.060190', 'step': 2472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:31.089742', 'step': 2472, 'epoch': 2} {'type': 'loss', 'content': 0.008812216110527515, 'timestamp': '2025-09-10 02:28:31.091787', 'step': 2473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:31.124476', 'step': 2473, 'epoch': 2} {'type': 'loss', 'content': 0.00666913902387023, 'timestamp': '2025-09-10 02:28:31.129592', 'step': 2474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:31.158828', 'step': 2474, 'epoch': 2} {'type': 'loss', 'content': 0.008816850371658802, 'timestamp': '2025-09-10 02:28:31.161132', 'step': 2475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:31.190080', 'step': 2475, 'epoch': 2} {'type': 'loss', 'content': 0.01382219884544611, 'timestamp': '2025-09-10 02:28:31.220662', 'step': 2476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:31.253306', 'step': 2476, 'epoch': 2} {'type': 'loss', 'content': 0.028483625501394272, 'timestamp': '2025-09-10 02:28:31.255379', 'step': 2477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:31.284350', 'step': 2477, 'epoch': 2} {'type': 'loss', 'content': 0.0031778302509337664, 'timestamp': '2025-09-10 02:28:31.286321', 'step': 2478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:31.316018', 'step': 2478, 'epoch': 2} {'type': 'loss', 'content': 0.005270532798022032, 'timestamp': '2025-09-10 02:28:31.321055', 'step': 2479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:31.350572', 'step': 2479, 'epoch': 2} {'type': 'loss', 'content': 0.046177320182323456, 'timestamp': '2025-09-10 02:28:31.374143', 'step': 2480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:31.411092', 'step': 2480, 'epoch': 2} {'type': 'loss', 'content': 0.004339002072811127, 'timestamp': '2025-09-10 02:28:31.417184', 'step': 2481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:31.458815', 'step': 2481, 'epoch': 2} {'type': 'loss', 'content': 0.014661594294011593, 'timestamp': '2025-09-10 02:28:31.460704', 'step': 2482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:31.496032', 'step': 2482, 'epoch': 2} {'type': 'loss', 'content': 0.0027640238404273987, 'timestamp': '2025-09-10 02:28:31.497802', 'step': 2483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:31.526533', 'step': 2483, 'epoch': 2} {'type': 'loss', 'content': 0.0363384447991848, 'timestamp': '2025-09-10 02:28:31.551783', 'step': 2484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:31.584971', 'step': 2484, 'epoch': 2} {'type': 'loss', 'content': 0.05729368329048157, 'timestamp': '2025-09-10 02:28:31.586902', 'step': 2485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:31.615481', 'step': 2485, 'epoch': 2} {'type': 'loss', 'content': 0.017933333292603493, 'timestamp': '2025-09-10 02:28:31.617459', 'step': 2486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:31.646426', 'step': 2486, 'epoch': 2} {'type': 'loss', 'content': 0.04761345311999321, 'timestamp': '2025-09-10 02:28:31.648638', 'step': 2487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:31.677304', 'step': 2487, 'epoch': 2} {'type': 'loss', 'content': 0.021747812628746033, 'timestamp': '2025-09-10 02:28:31.700856', 'step': 2488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:31.729450', 'step': 2488, 'epoch': 2} {'type': 'loss', 'content': 0.09620558470487595, 'timestamp': '2025-09-10 02:28:31.731191', 'step': 2489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:31.760121', 'step': 2489, 'epoch': 2} {'type': 'loss', 'content': 0.0060876780189573765, 'timestamp': '2025-09-10 02:28:31.767275', 'step': 2490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:31.802050', 'step': 2490, 'epoch': 2} {'type': 'loss', 'content': 0.007496859412640333, 'timestamp': '2025-09-10 02:28:31.803835', 'step': 2491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:31.832695', 'step': 2491, 'epoch': 2} {'type': 'loss', 'content': 0.038308579474687576, 'timestamp': '2025-09-10 02:28:31.855914', 'step': 2492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:31.884613', 'step': 2492, 'epoch': 2} {'type': 'loss', 'content': 0.011420799419283867, 'timestamp': '2025-09-10 02:28:31.886518', 'step': 2493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:31.920924', 'step': 2493, 'epoch': 2} {'type': 'loss', 'content': 0.027831334620714188, 'timestamp': '2025-09-10 02:28:31.923020', 'step': 2494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:31.956075', 'step': 2494, 'epoch': 2} {'type': 'loss', 'content': 0.0011441315291449428, 'timestamp': '2025-09-10 02:28:31.961311', 'step': 2495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:31.997411', 'step': 2495, 'epoch': 2} {'type': 'loss', 'content': 0.06489991396665573, 'timestamp': '2025-09-10 02:28:32.020942', 'step': 2496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:32.050581', 'step': 2496, 'epoch': 2} {'type': 'loss', 'content': 0.0068673426285386086, 'timestamp': '2025-09-10 02:28:32.052444', 'step': 2497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:32.081635', 'step': 2497, 'epoch': 2} {'type': 'loss', 'content': 0.01522838231176138, 'timestamp': '2025-09-10 02:28:32.083515', 'step': 2498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:32.118377', 'step': 2498, 'epoch': 2} {'type': 'loss', 'content': 0.04047464206814766, 'timestamp': '2025-09-10 02:28:32.121851', 'step': 2499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:32.152023', 'step': 2499, 'epoch': 2} {'type': 'loss', 'content': 0.028804613277316093, 'timestamp': '2025-09-10 02:28:32.175625', 'step': 2500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 2500', 'timestamp': '2025-09-10 02:28:36.661355', 'step': 2500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:36.699811', 'step': 2500, 'epoch': 2} {'type': 'loss', 'content': 0.04566410928964615, 'timestamp': '2025-09-10 02:28:36.701594', 'step': 2501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:36.730919', 'step': 2501, 'epoch': 2} {'type': 'loss', 'content': 0.0038940771482884884, 'timestamp': '2025-09-10 02:28:36.733940', 'step': 2502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:36.764923', 'step': 2502, 'epoch': 2} {'type': 'loss', 'content': 0.005365054588764906, 'timestamp': '2025-09-10 02:28:36.767684', 'step': 2503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:36.796559', 'step': 2503, 'epoch': 2} {'type': 'loss', 'content': 0.008775746449828148, 'timestamp': '2025-09-10 02:28:36.820387', 'step': 2504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:36.851528', 'step': 2504, 'epoch': 2} {'type': 'loss', 'content': 0.06777717918157578, 'timestamp': '2025-09-10 02:28:36.853341', 'step': 2505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:36.882745', 'step': 2505, 'epoch': 2} {'type': 'loss', 'content': 0.016283374279737473, 'timestamp': '2025-09-10 02:28:36.884678', 'step': 2506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:36.913400', 'step': 2506, 'epoch': 2} {'type': 'loss', 'content': 0.025961117818951607, 'timestamp': '2025-09-10 02:28:36.915247', 'step': 2507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:36.944639', 'step': 2507, 'epoch': 2} {'type': 'loss', 'content': 0.015359207056462765, 'timestamp': '2025-09-10 02:28:36.968379', 'step': 2508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:36.997144', 'step': 2508, 'epoch': 2} {'type': 'loss', 'content': 0.024218466132879257, 'timestamp': '2025-09-10 02:28:36.998853', 'step': 2509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:37.027321', 'step': 2509, 'epoch': 2} {'type': 'loss', 'content': 0.023411747068166733, 'timestamp': '2025-09-10 02:28:37.029001', 'step': 2510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:37.057988', 'step': 2510, 'epoch': 2} {'type': 'loss', 'content': 0.060719482600688934, 'timestamp': '2025-09-10 02:28:37.059829', 'step': 2511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:37.088745', 'step': 2511, 'epoch': 2} {'type': 'loss', 'content': 0.008819801732897758, 'timestamp': '2025-09-10 02:28:37.112095', 'step': 2512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:37.140864', 'step': 2512, 'epoch': 2} {'type': 'loss', 'content': 0.03373945131897926, 'timestamp': '2025-09-10 02:28:37.142889', 'step': 2513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:37.171714', 'step': 2513, 'epoch': 2} {'type': 'loss', 'content': 0.03358836844563484, 'timestamp': '2025-09-10 02:28:37.173547', 'step': 2514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:37.202581', 'step': 2514, 'epoch': 2} {'type': 'loss', 'content': 0.03507380932569504, 'timestamp': '2025-09-10 02:28:37.204262', 'step': 2515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:37.232727', 'step': 2515, 'epoch': 2} {'type': 'loss', 'content': 0.006715219002217054, 'timestamp': '2025-09-10 02:28:37.256120', 'step': 2516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:37.284867', 'step': 2516, 'epoch': 2} {'type': 'loss', 'content': 0.009967942722141743, 'timestamp': '2025-09-10 02:28:37.286783', 'step': 2517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:37.315528', 'step': 2517, 'epoch': 2} {'type': 'loss', 'content': 0.05242612957954407, 'timestamp': '2025-09-10 02:28:37.317284', 'step': 2518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:37.346105', 'step': 2518, 'epoch': 2} {'type': 'loss', 'content': 0.01925036497414112, 'timestamp': '2025-09-10 02:28:37.347928', 'step': 2519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:37.376272', 'step': 2519, 'epoch': 2} {'type': 'loss', 'content': 0.04645388573408127, 'timestamp': '2025-09-10 02:28:37.399605', 'step': 2520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:37.428407', 'step': 2520, 'epoch': 2} {'type': 'loss', 'content': 0.018919702619314194, 'timestamp': '2025-09-10 02:28:37.430141', 'step': 2521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:37.458550', 'step': 2521, 'epoch': 2} {'type': 'loss', 'content': 0.038673412054777145, 'timestamp': '2025-09-10 02:28:37.460352', 'step': 2522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:37.489229', 'step': 2522, 'epoch': 2} {'type': 'loss', 'content': 0.028117135167121887, 'timestamp': '2025-09-10 02:28:37.491089', 'step': 2523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:37.519640', 'step': 2523, 'epoch': 2} {'type': 'loss', 'content': 0.01898709125816822, 'timestamp': '2025-09-10 02:28:37.542858', 'step': 2524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:37.571692', 'step': 2524, 'epoch': 2} {'type': 'loss', 'content': 0.007139905821532011, 'timestamp': '2025-09-10 02:28:37.573295', 'step': 2525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:37.602185', 'step': 2525, 'epoch': 2} {'type': 'loss', 'content': 0.021932478994131088, 'timestamp': '2025-09-10 02:28:37.604436', 'step': 2526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:37.632945', 'step': 2526, 'epoch': 2} {'type': 'loss', 'content': 0.030555009841918945, 'timestamp': '2025-09-10 02:28:37.634568', 'step': 2527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:37.663346', 'step': 2527, 'epoch': 2} {'type': 'loss', 'content': 0.022133933380246162, 'timestamp': '2025-09-10 02:28:37.686654', 'step': 2528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:37.715622', 'step': 2528, 'epoch': 2} {'type': 'loss', 'content': 0.020784815773367882, 'timestamp': '2025-09-10 02:28:37.717292', 'step': 2529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:37.747072', 'step': 2529, 'epoch': 2} {'type': 'loss', 'content': 0.018027016893029213, 'timestamp': '2025-09-10 02:28:37.748787', 'step': 2530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:37.777372', 'step': 2530, 'epoch': 2} {'type': 'loss', 'content': 0.04622536525130272, 'timestamp': '2025-09-10 02:28:37.780375', 'step': 2531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:37.810166', 'step': 2531, 'epoch': 2} {'type': 'loss', 'content': 0.01820000819861889, 'timestamp': '2025-09-10 02:28:37.833434', 'step': 2532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:37.862635', 'step': 2532, 'epoch': 2} {'type': 'loss', 'content': 0.027822909876704216, 'timestamp': '2025-09-10 02:28:37.864286', 'step': 2533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:37.893185', 'step': 2533, 'epoch': 2} {'type': 'loss', 'content': 0.0055721839889883995, 'timestamp': '2025-09-10 02:28:37.895084', 'step': 2534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:37.923962', 'step': 2534, 'epoch': 2} {'type': 'loss', 'content': 0.020961089059710503, 'timestamp': '2025-09-10 02:28:37.925672', 'step': 2535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:37.954699', 'step': 2535, 'epoch': 2} {'type': 'loss', 'content': 0.015716660767793655, 'timestamp': '2025-09-10 02:28:37.978075', 'step': 2536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:38.006857', 'step': 2536, 'epoch': 2} {'type': 'loss', 'content': 0.02635492943227291, 'timestamp': '2025-09-10 02:28:38.008896', 'step': 2537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:38.037279', 'step': 2537, 'epoch': 2} {'type': 'loss', 'content': 0.019367411732673645, 'timestamp': '2025-09-10 02:28:38.038968', 'step': 2538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:38.067699', 'step': 2538, 'epoch': 2} {'type': 'loss', 'content': 0.03078184649348259, 'timestamp': '2025-09-10 02:28:38.069771', 'step': 2539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:38.098866', 'step': 2539, 'epoch': 2} {'type': 'loss', 'content': 0.006509651895612478, 'timestamp': '2025-09-10 02:28:38.122395', 'step': 2540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:38.151574', 'step': 2540, 'epoch': 2} {'type': 'loss', 'content': 0.006312340032309294, 'timestamp': '2025-09-10 02:28:38.153223', 'step': 2541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:38.183136', 'step': 2541, 'epoch': 2} {'type': 'loss', 'content': 0.007816670462489128, 'timestamp': '2025-09-10 02:28:38.185007', 'step': 2542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:38.213801', 'step': 2542, 'epoch': 2} {'type': 'loss', 'content': 0.040647126734256744, 'timestamp': '2025-09-10 02:28:38.215655', 'step': 2543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:38.244169', 'step': 2543, 'epoch': 2} {'type': 'loss', 'content': 0.020041000097990036, 'timestamp': '2025-09-10 02:28:38.267372', 'step': 2544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:38.295950', 'step': 2544, 'epoch': 2} {'type': 'loss', 'content': 0.006902061402797699, 'timestamp': '2025-09-10 02:28:38.297706', 'step': 2545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:38.325988', 'step': 2545, 'epoch': 2} {'type': 'loss', 'content': 0.016499994322657585, 'timestamp': '2025-09-10 02:28:38.327784', 'step': 2546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:38.356411', 'step': 2546, 'epoch': 2} {'type': 'loss', 'content': 0.050838652998209, 'timestamp': '2025-09-10 02:28:38.358254', 'step': 2547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:38.386601', 'step': 2547, 'epoch': 2} {'type': 'loss', 'content': 0.013835342600941658, 'timestamp': '2025-09-10 02:28:38.409806', 'step': 2548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:38.438283', 'step': 2548, 'epoch': 2} {'type': 'loss', 'content': 0.0341733880341053, 'timestamp': '2025-09-10 02:28:38.440247', 'step': 2549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:38.469709', 'step': 2549, 'epoch': 2} {'type': 'loss', 'content': 0.00761485705152154, 'timestamp': '2025-09-10 02:28:38.471456', 'step': 2550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:38.500935', 'step': 2550, 'epoch': 2} {'type': 'loss', 'content': 0.04974096640944481, 'timestamp': '2025-09-10 02:28:38.502957', 'step': 2551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:38.532220', 'step': 2551, 'epoch': 2} {'type': 'loss', 'content': 0.013284144923090935, 'timestamp': '2025-09-10 02:28:38.555584', 'step': 2552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:38.585107', 'step': 2552, 'epoch': 2} {'type': 'loss', 'content': 0.04868572577834129, 'timestamp': '2025-09-10 02:28:38.586846', 'step': 2553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:38.616993', 'step': 2553, 'epoch': 2} {'type': 'loss', 'content': 0.006279000546783209, 'timestamp': '2025-09-10 02:28:38.618720', 'step': 2554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:38.647613', 'step': 2554, 'epoch': 2} {'type': 'loss', 'content': 0.009842381812632084, 'timestamp': '2025-09-10 02:28:38.649217', 'step': 2555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:38.677294', 'step': 2555, 'epoch': 2} {'type': 'loss', 'content': 0.007323114667087793, 'timestamp': '2025-09-10 02:28:38.700624', 'step': 2556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:38.730470', 'step': 2556, 'epoch': 2} {'type': 'loss', 'content': 0.0027403663843870163, 'timestamp': '2025-09-10 02:28:38.732345', 'step': 2557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:38.761195', 'step': 2557, 'epoch': 2} {'type': 'loss', 'content': 0.007774870377033949, 'timestamp': '2025-09-10 02:28:38.763080', 'step': 2558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:38.791931', 'step': 2558, 'epoch': 2} {'type': 'loss', 'content': 0.02943340130150318, 'timestamp': '2025-09-10 02:28:38.793972', 'step': 2559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:38.822712', 'step': 2559, 'epoch': 2} {'type': 'loss', 'content': 0.008991079404950142, 'timestamp': '2025-09-10 02:28:38.846010', 'step': 2560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:38.875309', 'step': 2560, 'epoch': 2} {'type': 'loss', 'content': 0.06065422296524048, 'timestamp': '2025-09-10 02:28:38.877038', 'step': 2561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:38.906036', 'step': 2561, 'epoch': 2} {'type': 'loss', 'content': 0.003688907716423273, 'timestamp': '2025-09-10 02:28:38.907890', 'step': 2562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:38.936639', 'step': 2562, 'epoch': 2} {'type': 'loss', 'content': 0.034662097692489624, 'timestamp': '2025-09-10 02:28:38.938346', 'step': 2563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:38.966914', 'step': 2563, 'epoch': 2} {'type': 'loss', 'content': 0.015825720503926277, 'timestamp': '2025-09-10 02:28:38.990013', 'step': 2564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:39.018690', 'step': 2564, 'epoch': 2} {'type': 'loss', 'content': 0.00597797054797411, 'timestamp': '2025-09-10 02:28:39.020777', 'step': 2565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:39.049590', 'step': 2565, 'epoch': 2} {'type': 'loss', 'content': 0.0023908796720206738, 'timestamp': '2025-09-10 02:28:39.051192', 'step': 2566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:39.079745', 'step': 2566, 'epoch': 2} {'type': 'loss', 'content': 0.007320891600102186, 'timestamp': '2025-09-10 02:28:39.081257', 'step': 2567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:39.110070', 'step': 2567, 'epoch': 2} {'type': 'loss', 'content': 0.006848949007689953, 'timestamp': '2025-09-10 02:28:39.133209', 'step': 2568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:39.161759', 'step': 2568, 'epoch': 2} {'type': 'loss', 'content': 0.004966162610799074, 'timestamp': '2025-09-10 02:28:39.163564', 'step': 2569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:39.192309', 'step': 2569, 'epoch': 2} {'type': 'loss', 'content': 0.0005787332192994654, 'timestamp': '2025-09-10 02:28:39.193984', 'step': 2570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:39.222918', 'step': 2570, 'epoch': 2} {'type': 'loss', 'content': 0.01637382246553898, 'timestamp': '2025-09-10 02:28:39.224654', 'step': 2571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:39.253226', 'step': 2571, 'epoch': 2} {'type': 'loss', 'content': 0.016918400302529335, 'timestamp': '2025-09-10 02:28:39.276688', 'step': 2572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:39.305813', 'step': 2572, 'epoch': 2} {'type': 'loss', 'content': 0.018513483926653862, 'timestamp': '2025-09-10 02:28:39.307489', 'step': 2573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:39.336169', 'step': 2573, 'epoch': 2} {'type': 'loss', 'content': 0.007836680859327316, 'timestamp': '2025-09-10 02:28:39.337775', 'step': 2574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:39.366291', 'step': 2574, 'epoch': 2} {'type': 'loss', 'content': 0.012387419119477272, 'timestamp': '2025-09-10 02:28:39.367963', 'step': 2575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:39.396554', 'step': 2575, 'epoch': 2} {'type': 'loss', 'content': 0.006354599259793758, 'timestamp': '2025-09-10 02:28:39.419660', 'step': 2576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:39.448347', 'step': 2576, 'epoch': 2} {'type': 'loss', 'content': 0.0026027504354715347, 'timestamp': '2025-09-10 02:28:39.450156', 'step': 2577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:39.478869', 'step': 2577, 'epoch': 2} {'type': 'loss', 'content': 0.0239602942019701, 'timestamp': '2025-09-10 02:28:39.480543', 'step': 2578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:39.509059', 'step': 2578, 'epoch': 2} {'type': 'loss', 'content': 0.0015381601406261325, 'timestamp': '2025-09-10 02:28:39.511840', 'step': 2579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:39.541531', 'step': 2579, 'epoch': 2} {'type': 'loss', 'content': 0.032466478645801544, 'timestamp': '2025-09-10 02:28:39.564925', 'step': 2580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:39.594126', 'step': 2580, 'epoch': 2} {'type': 'loss', 'content': 0.009972876869142056, 'timestamp': '2025-09-10 02:28:39.595801', 'step': 2581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:39.624394', 'step': 2581, 'epoch': 2} {'type': 'loss', 'content': 0.01930398680269718, 'timestamp': '2025-09-10 02:28:39.626111', 'step': 2582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:39.655706', 'step': 2582, 'epoch': 2} {'type': 'loss', 'content': 0.010050845332443714, 'timestamp': '2025-09-10 02:28:39.657625', 'step': 2583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:39.685981', 'step': 2583, 'epoch': 2} {'type': 'loss', 'content': 0.011527531780302525, 'timestamp': '2025-09-10 02:28:39.709309', 'step': 2584, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1582003754624}], 'timestamp': '2025-09-10 02:28:41.597481', 'step': 2584, 'epoch': 2} {'type': 'pplx', 'content': 2603673.316314668, 'timestamp': '2025-09-10 02:28:41.608467', 'step': 2584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:41.638879', 'step': 2584, 'epoch': 2} {'type': 'loss', 'content': 0.0049915858544409275, 'timestamp': '2025-09-10 02:28:41.647162', 'step': 2585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:41.685707', 'step': 2585, 'epoch': 2} {'type': 'loss', 'content': 0.0036541339941322803, 'timestamp': '2025-09-10 02:28:41.687624', 'step': 2586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:41.737483', 'step': 2586, 'epoch': 2} {'type': 'loss', 'content': 0.011626863852143288, 'timestamp': '2025-09-10 02:28:41.739392', 'step': 2587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:41.768215', 'step': 2587, 'epoch': 2} {'type': 'loss', 'content': 0.00713756587356329, 'timestamp': '2025-09-10 02:28:41.791820', 'step': 2588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:41.821547', 'step': 2588, 'epoch': 2} {'type': 'loss', 'content': 0.03513413295149803, 'timestamp': '2025-09-10 02:28:41.823161', 'step': 2589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:41.851579', 'step': 2589, 'epoch': 2} {'type': 'loss', 'content': 0.03290729597210884, 'timestamp': '2025-09-10 02:28:41.853235', 'step': 2590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:41.887869', 'step': 2590, 'epoch': 2} {'type': 'loss', 'content': 0.011574178002774715, 'timestamp': '2025-09-10 02:28:41.889489', 'step': 2591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:41.926275', 'step': 2591, 'epoch': 2} {'type': 'loss', 'content': 0.03582843765616417, 'timestamp': '2025-09-10 02:28:41.949708', 'step': 2592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:41.979412', 'step': 2592, 'epoch': 2} {'type': 'loss', 'content': 0.007640053052455187, 'timestamp': '2025-09-10 02:28:41.981192', 'step': 2593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:42.010940', 'step': 2593, 'epoch': 2} {'type': 'loss', 'content': 0.0010189699241891503, 'timestamp': '2025-09-10 02:28:42.013409', 'step': 2594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:42.043256', 'step': 2594, 'epoch': 2} {'type': 'loss', 'content': 0.01183264423161745, 'timestamp': '2025-09-10 02:28:42.046844', 'step': 2595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:42.076579', 'step': 2595, 'epoch': 2} {'type': 'loss', 'content': 0.024438226595520973, 'timestamp': '2025-09-10 02:28:42.099875', 'step': 2596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:42.128732', 'step': 2596, 'epoch': 2} {'type': 'loss', 'content': 0.0017751975683495402, 'timestamp': '2025-09-10 02:28:42.130670', 'step': 2597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:42.158966', 'step': 2597, 'epoch': 2} {'type': 'loss', 'content': 0.009779882617294788, 'timestamp': '2025-09-10 02:28:42.160781', 'step': 2598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:42.189417', 'step': 2598, 'epoch': 2} {'type': 'loss', 'content': 0.010804369114339352, 'timestamp': '2025-09-10 02:28:42.191848', 'step': 2599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:42.220605', 'step': 2599, 'epoch': 2} {'type': 'loss', 'content': 0.008427734486758709, 'timestamp': '2025-09-10 02:28:42.243693', 'step': 2600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:42.278359', 'step': 2600, 'epoch': 2} {'type': 'loss', 'content': 0.009223179891705513, 'timestamp': '2025-09-10 02:28:42.280435', 'step': 2601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:42.309506', 'step': 2601, 'epoch': 2} {'type': 'loss', 'content': 0.006789051927626133, 'timestamp': '2025-09-10 02:28:42.311319', 'step': 2602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:42.340236', 'step': 2602, 'epoch': 2} {'type': 'loss', 'content': 0.013294040225446224, 'timestamp': '2025-09-10 02:28:42.342554', 'step': 2603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:42.374724', 'step': 2603, 'epoch': 2} {'type': 'loss', 'content': 0.01670178398489952, 'timestamp': '2025-09-10 02:28:42.398146', 'step': 2604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:42.426579', 'step': 2604, 'epoch': 2} {'type': 'loss', 'content': 0.04589006304740906, 'timestamp': '2025-09-10 02:28:42.428103', 'step': 2605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:42.461572', 'step': 2605, 'epoch': 2} {'type': 'loss', 'content': 0.005605275277048349, 'timestamp': '2025-09-10 02:28:42.467525', 'step': 2606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:42.504863', 'step': 2606, 'epoch': 2} {'type': 'loss', 'content': 0.015701550990343094, 'timestamp': '2025-09-10 02:28:42.507084', 'step': 2607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:42.540207', 'step': 2607, 'epoch': 2} {'type': 'loss', 'content': 0.0018044470343738794, 'timestamp': '2025-09-10 02:28:42.563739', 'step': 2608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:42.592598', 'step': 2608, 'epoch': 2} {'type': 'loss', 'content': 0.033574432134628296, 'timestamp': '2025-09-10 02:28:42.594103', 'step': 2609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:42.622566', 'step': 2609, 'epoch': 2} {'type': 'loss', 'content': 0.001270159031264484, 'timestamp': '2025-09-10 02:28:42.624089', 'step': 2610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:42.652470', 'step': 2610, 'epoch': 2} {'type': 'loss', 'content': 0.012809211388230324, 'timestamp': '2025-09-10 02:28:42.654135', 'step': 2611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:42.683895', 'step': 2611, 'epoch': 2} {'type': 'loss', 'content': 0.0007309060310944915, 'timestamp': '2025-09-10 02:28:42.713356', 'step': 2612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:42.746101', 'step': 2612, 'epoch': 2} {'type': 'loss', 'content': 0.010813341476023197, 'timestamp': '2025-09-10 02:28:42.748359', 'step': 2613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:42.777580', 'step': 2613, 'epoch': 2} {'type': 'loss', 'content': 0.0004470552667044103, 'timestamp': '2025-09-10 02:28:42.783260', 'step': 2614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:42.815170', 'step': 2614, 'epoch': 2} {'type': 'loss', 'content': 0.0004041774955112487, 'timestamp': '2025-09-10 02:28:42.818957', 'step': 2615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:42.848473', 'step': 2615, 'epoch': 2} {'type': 'loss', 'content': 0.008302075788378716, 'timestamp': '2025-09-10 02:28:42.871884', 'step': 2616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:42.902214', 'step': 2616, 'epoch': 2} {'type': 'loss', 'content': 0.0021110589150339365, 'timestamp': '2025-09-10 02:28:42.905727', 'step': 2617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:42.934877', 'step': 2617, 'epoch': 2} {'type': 'loss', 'content': 0.019456753507256508, 'timestamp': '2025-09-10 02:28:42.936953', 'step': 2618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:42.975995', 'step': 2618, 'epoch': 2} {'type': 'loss', 'content': 0.006132456008344889, 'timestamp': '2025-09-10 02:28:42.978973', 'step': 2619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:43.009909', 'step': 2619, 'epoch': 2} {'type': 'loss', 'content': 0.024464385583996773, 'timestamp': '2025-09-10 02:28:43.036750', 'step': 2620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:43.076475', 'step': 2620, 'epoch': 2} {'type': 'loss', 'content': 0.017063161358237267, 'timestamp': '2025-09-10 02:28:43.078983', 'step': 2621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:43.111287', 'step': 2621, 'epoch': 2} {'type': 'loss', 'content': 0.002038109814748168, 'timestamp': '2025-09-10 02:28:43.116146', 'step': 2622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:43.145537', 'step': 2622, 'epoch': 2} {'type': 'loss', 'content': 0.0004370710230432451, 'timestamp': '2025-09-10 02:28:43.148788', 'step': 2623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:43.183874', 'step': 2623, 'epoch': 2} {'type': 'loss', 'content': 0.039566520601511, 'timestamp': '2025-09-10 02:28:43.210745', 'step': 2624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:43.243277', 'step': 2624, 'epoch': 2} {'type': 'loss', 'content': 0.0052605862729251385, 'timestamp': '2025-09-10 02:28:43.245879', 'step': 2625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:43.280556', 'step': 2625, 'epoch': 2} {'type': 'loss', 'content': 0.008730943314731121, 'timestamp': '2025-09-10 02:28:43.282263', 'step': 2626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:43.312425', 'step': 2626, 'epoch': 2} {'type': 'loss', 'content': 0.040866997092962265, 'timestamp': '2025-09-10 02:28:43.318297', 'step': 2627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:43.358041', 'step': 2627, 'epoch': 2} {'type': 'loss', 'content': 0.0032515223138034344, 'timestamp': '2025-09-10 02:28:43.386299', 'step': 2628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:43.420448', 'step': 2628, 'epoch': 2} {'type': 'loss', 'content': 0.04348122701048851, 'timestamp': '2025-09-10 02:28:43.426300', 'step': 2629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:43.463200', 'step': 2629, 'epoch': 2} {'type': 'loss', 'content': 0.014161914587020874, 'timestamp': '2025-09-10 02:28:43.465157', 'step': 2630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:43.495635', 'step': 2630, 'epoch': 2} {'type': 'loss', 'content': 0.023532714694738388, 'timestamp': '2025-09-10 02:28:43.497661', 'step': 2631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:43.527643', 'step': 2631, 'epoch': 2} {'type': 'loss', 'content': 0.058165520429611206, 'timestamp': '2025-09-10 02:28:43.551057', 'step': 2632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:43.581064', 'step': 2632, 'epoch': 2} {'type': 'loss', 'content': 0.006530240178108215, 'timestamp': '2025-09-10 02:28:43.582932', 'step': 2633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:43.613070', 'step': 2633, 'epoch': 2} {'type': 'loss', 'content': 0.03045285865664482, 'timestamp': '2025-09-10 02:28:43.614841', 'step': 2634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:43.646399', 'step': 2634, 'epoch': 2} {'type': 'loss', 'content': 0.06831801682710648, 'timestamp': '2025-09-10 02:28:43.659248', 'step': 2635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:43.694918', 'step': 2635, 'epoch': 2} {'type': 'loss', 'content': 0.05904409661889076, 'timestamp': '2025-09-10 02:28:43.720778', 'step': 2636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:43.761703', 'step': 2636, 'epoch': 2} {'type': 'loss', 'content': 0.00957201886922121, 'timestamp': '2025-09-10 02:28:43.765723', 'step': 2637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:43.801316', 'step': 2637, 'epoch': 2} {'type': 'loss', 'content': 0.01836211048066616, 'timestamp': '2025-09-10 02:28:43.803463', 'step': 2638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:43.833454', 'step': 2638, 'epoch': 2} {'type': 'loss', 'content': 0.028037531301379204, 'timestamp': '2025-09-10 02:28:43.839860', 'step': 2639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:43.871700', 'step': 2639, 'epoch': 2} {'type': 'loss', 'content': 0.007020160555839539, 'timestamp': '2025-09-10 02:28:43.896266', 'step': 2640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:43.928024', 'step': 2640, 'epoch': 2} {'type': 'loss', 'content': 0.0023041670210659504, 'timestamp': '2025-09-10 02:28:43.930707', 'step': 2641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:43.964816', 'step': 2641, 'epoch': 2} {'type': 'loss', 'content': 0.012306435965001583, 'timestamp': '2025-09-10 02:28:43.966575', 'step': 2642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:43.999362', 'step': 2642, 'epoch': 2} {'type': 'loss', 'content': 0.023700138553977013, 'timestamp': '2025-09-10 02:28:44.001229', 'step': 2643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:44.030000', 'step': 2643, 'epoch': 2} {'type': 'loss', 'content': 0.006072094198316336, 'timestamp': '2025-09-10 02:28:44.057684', 'step': 2644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:44.089169', 'step': 2644, 'epoch': 2} {'type': 'loss', 'content': 0.0027935353573411703, 'timestamp': '2025-09-10 02:28:44.091396', 'step': 2645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:44.120788', 'step': 2645, 'epoch': 2} {'type': 'loss', 'content': 0.006508468184620142, 'timestamp': '2025-09-10 02:28:44.122809', 'step': 2646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:44.154913', 'step': 2646, 'epoch': 2} {'type': 'loss', 'content': 0.04970017448067665, 'timestamp': '2025-09-10 02:28:44.156851', 'step': 2647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:44.186096', 'step': 2647, 'epoch': 2} {'type': 'loss', 'content': 0.012843841686844826, 'timestamp': '2025-09-10 02:28:44.211472', 'step': 2648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:44.244124', 'step': 2648, 'epoch': 2} {'type': 'loss', 'content': 0.015078941360116005, 'timestamp': '2025-09-10 02:28:44.248137', 'step': 2649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:44.278048', 'step': 2649, 'epoch': 2} {'type': 'loss', 'content': 0.07857070863246918, 'timestamp': '2025-09-10 02:28:44.279840', 'step': 2650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:44.308643', 'step': 2650, 'epoch': 2} {'type': 'loss', 'content': 0.05733145400881767, 'timestamp': '2025-09-10 02:28:44.318771', 'step': 2651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:44.357872', 'step': 2651, 'epoch': 2} {'type': 'loss', 'content': 0.005751179065555334, 'timestamp': '2025-09-10 02:28:44.381326', 'step': 2652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:44.410397', 'step': 2652, 'epoch': 2} {'type': 'loss', 'content': 0.005302275065332651, 'timestamp': '2025-09-10 02:28:44.412235', 'step': 2653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:44.441003', 'step': 2653, 'epoch': 2} {'type': 'loss', 'content': 0.012114384211599827, 'timestamp': '2025-09-10 02:28:44.442897', 'step': 2654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:44.471813', 'step': 2654, 'epoch': 2} {'type': 'loss', 'content': 0.030837317928671837, 'timestamp': '2025-09-10 02:28:44.474080', 'step': 2655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:44.503113', 'step': 2655, 'epoch': 2} {'type': 'loss', 'content': 0.08814512938261032, 'timestamp': '2025-09-10 02:28:44.526592', 'step': 2656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:44.560619', 'step': 2656, 'epoch': 2} {'type': 'loss', 'content': 0.020036103203892708, 'timestamp': '2025-09-10 02:28:44.566262', 'step': 2657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:44.595571', 'step': 2657, 'epoch': 2} {'type': 'loss', 'content': 0.0062383851036429405, 'timestamp': '2025-09-10 02:28:44.603606', 'step': 2658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:44.634411', 'step': 2658, 'epoch': 2} {'type': 'loss', 'content': 0.01709858886897564, 'timestamp': '2025-09-10 02:28:44.639660', 'step': 2659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:44.670717', 'step': 2659, 'epoch': 2} {'type': 'loss', 'content': 0.017863783985376358, 'timestamp': '2025-09-10 02:28:44.694853', 'step': 2660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:44.728640', 'step': 2660, 'epoch': 2} {'type': 'loss', 'content': 0.07673000544309616, 'timestamp': '2025-09-10 02:28:44.730596', 'step': 2661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:44.759636', 'step': 2661, 'epoch': 2} {'type': 'loss', 'content': 0.007981250993907452, 'timestamp': '2025-09-10 02:28:44.764247', 'step': 2662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:44.799874', 'step': 2662, 'epoch': 2} {'type': 'loss', 'content': 0.05993294715881348, 'timestamp': '2025-09-10 02:28:44.801504', 'step': 2663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:44.830696', 'step': 2663, 'epoch': 2} {'type': 'loss', 'content': 0.03284836187958717, 'timestamp': '2025-09-10 02:28:44.854047', 'step': 2664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:44.892058', 'step': 2664, 'epoch': 2} {'type': 'loss', 'content': 0.011232273653149605, 'timestamp': '2025-09-10 02:28:44.893793', 'step': 2665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:44.927935', 'step': 2665, 'epoch': 2} {'type': 'loss', 'content': 0.03330313041806221, 'timestamp': '2025-09-10 02:28:44.930654', 'step': 2666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:44.969140', 'step': 2666, 'epoch': 2} {'type': 'loss', 'content': 0.0025289016775786877, 'timestamp': '2025-09-10 02:28:44.972991', 'step': 2667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:45.005680', 'step': 2667, 'epoch': 2} {'type': 'loss', 'content': 0.018535206094384193, 'timestamp': '2025-09-10 02:28:45.028991', 'step': 2668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:45.082702', 'step': 2668, 'epoch': 2} {'type': 'loss', 'content': 0.025767965242266655, 'timestamp': '2025-09-10 02:28:45.086962', 'step': 2669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:45.116305', 'step': 2669, 'epoch': 2} {'type': 'loss', 'content': 0.01067658793181181, 'timestamp': '2025-09-10 02:28:45.118166', 'step': 2670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:45.147155', 'step': 2670, 'epoch': 2} {'type': 'loss', 'content': 0.02955467812716961, 'timestamp': '2025-09-10 02:28:45.148871', 'step': 2671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:45.177530', 'step': 2671, 'epoch': 2} {'type': 'loss', 'content': 0.0016258393879979849, 'timestamp': '2025-09-10 02:28:45.200842', 'step': 2672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:45.232223', 'step': 2672, 'epoch': 2} {'type': 'loss', 'content': 0.009552685543894768, 'timestamp': '2025-09-10 02:28:45.234110', 'step': 2673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:45.262919', 'step': 2673, 'epoch': 2} {'type': 'loss', 'content': 0.010288403369486332, 'timestamp': '2025-09-10 02:28:45.264812', 'step': 2674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:45.295388', 'step': 2674, 'epoch': 2} {'type': 'loss', 'content': 0.05034352466464043, 'timestamp': '2025-09-10 02:28:45.297408', 'step': 2675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:45.326872', 'step': 2675, 'epoch': 2} {'type': 'loss', 'content': 0.003926003817468882, 'timestamp': '2025-09-10 02:28:45.350413', 'step': 2676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:45.380821', 'step': 2676, 'epoch': 2} {'type': 'loss', 'content': 0.0008642165921628475, 'timestamp': '2025-09-10 02:28:45.382766', 'step': 2677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:45.415963', 'step': 2677, 'epoch': 2} {'type': 'loss', 'content': 0.029930176213383675, 'timestamp': '2025-09-10 02:28:45.417815', 'step': 2678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:45.447174', 'step': 2678, 'epoch': 2} {'type': 'loss', 'content': 0.01474265567958355, 'timestamp': '2025-09-10 02:28:45.448930', 'step': 2679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:45.478405', 'step': 2679, 'epoch': 2} {'type': 'loss', 'content': 0.012907499447464943, 'timestamp': '2025-09-10 02:28:45.501971', 'step': 2680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:45.534371', 'step': 2680, 'epoch': 2} {'type': 'loss', 'content': 0.039185430854558945, 'timestamp': '2025-09-10 02:28:45.540662', 'step': 2681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:45.577291', 'step': 2681, 'epoch': 2} {'type': 'loss', 'content': 0.02835225686430931, 'timestamp': '2025-09-10 02:28:45.579395', 'step': 2682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:45.609186', 'step': 2682, 'epoch': 2} {'type': 'loss', 'content': 0.02721690945327282, 'timestamp': '2025-09-10 02:28:45.612641', 'step': 2683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:45.645024', 'step': 2683, 'epoch': 2} {'type': 'loss', 'content': 0.03995153307914734, 'timestamp': '2025-09-10 02:28:45.669533', 'step': 2684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:45.698932', 'step': 2684, 'epoch': 2} {'type': 'loss', 'content': 0.028712604194879532, 'timestamp': '2025-09-10 02:28:45.701971', 'step': 2685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:45.736733', 'step': 2685, 'epoch': 2} {'type': 'loss', 'content': 0.018331315368413925, 'timestamp': '2025-09-10 02:28:45.738577', 'step': 2686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:45.767531', 'step': 2686, 'epoch': 2} {'type': 'loss', 'content': 0.03021402657032013, 'timestamp': '2025-09-10 02:28:45.770332', 'step': 2687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:45.799580', 'step': 2687, 'epoch': 2} {'type': 'loss', 'content': 0.053654056042432785, 'timestamp': '2025-09-10 02:28:45.823051', 'step': 2688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:45.865111', 'step': 2688, 'epoch': 2} {'type': 'loss', 'content': 0.06579221040010452, 'timestamp': '2025-09-10 02:28:45.871938', 'step': 2689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:45.902535', 'step': 2689, 'epoch': 2} {'type': 'loss', 'content': 0.004453813191503286, 'timestamp': '2025-09-10 02:28:45.904700', 'step': 2690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:45.933807', 'step': 2690, 'epoch': 2} {'type': 'loss', 'content': 0.027801888063549995, 'timestamp': '2025-09-10 02:28:45.937495', 'step': 2691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:45.968546', 'step': 2691, 'epoch': 2} {'type': 'loss', 'content': 0.026753781363368034, 'timestamp': '2025-09-10 02:28:45.991887', 'step': 2692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:46.020984', 'step': 2692, 'epoch': 2} {'type': 'loss', 'content': 0.013489528559148312, 'timestamp': '2025-09-10 02:28:46.024261', 'step': 2693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:46.055378', 'step': 2693, 'epoch': 2} {'type': 'loss', 'content': 0.01014415267854929, 'timestamp': '2025-09-10 02:28:46.057229', 'step': 2694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:46.086052', 'step': 2694, 'epoch': 2} {'type': 'loss', 'content': 0.01844717375934124, 'timestamp': '2025-09-10 02:28:46.087864', 'step': 2695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:46.117600', 'step': 2695, 'epoch': 2} {'type': 'loss', 'content': 0.02382180653512478, 'timestamp': '2025-09-10 02:28:46.140928', 'step': 2696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:46.176893', 'step': 2696, 'epoch': 2} {'type': 'loss', 'content': 0.05853342264890671, 'timestamp': '2025-09-10 02:28:46.178790', 'step': 2697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:46.210009', 'step': 2697, 'epoch': 2} {'type': 'loss', 'content': 0.014555533416569233, 'timestamp': '2025-09-10 02:28:46.214423', 'step': 2698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:46.245417', 'step': 2698, 'epoch': 2} {'type': 'loss', 'content': 0.027585254982113838, 'timestamp': '2025-09-10 02:28:46.249265', 'step': 2699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:46.281344', 'step': 2699, 'epoch': 2} {'type': 'loss', 'content': 0.02432733215391636, 'timestamp': '2025-09-10 02:28:46.304864', 'step': 2700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:46.336204', 'step': 2700, 'epoch': 2} {'type': 'loss', 'content': 0.018436763435602188, 'timestamp': '2025-09-10 02:28:46.339532', 'step': 2701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:46.381338', 'step': 2701, 'epoch': 2} {'type': 'loss', 'content': 0.005758058279752731, 'timestamp': '2025-09-10 02:28:46.383259', 'step': 2702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:46.412278', 'step': 2702, 'epoch': 2} {'type': 'loss', 'content': 0.018294544890522957, 'timestamp': '2025-09-10 02:28:46.415349', 'step': 2703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:46.451384', 'step': 2703, 'epoch': 2} {'type': 'loss', 'content': 0.003365014912560582, 'timestamp': '2025-09-10 02:28:46.475726', 'step': 2704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:46.510522', 'step': 2704, 'epoch': 2} {'type': 'loss', 'content': 0.025826290249824524, 'timestamp': '2025-09-10 02:28:46.519224', 'step': 2705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:46.550744', 'step': 2705, 'epoch': 2} {'type': 'loss', 'content': 0.04405367746949196, 'timestamp': '2025-09-10 02:28:46.552855', 'step': 2706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:46.581725', 'step': 2706, 'epoch': 2} {'type': 'loss', 'content': 0.013567561283707619, 'timestamp': '2025-09-10 02:28:46.583336', 'step': 2707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:46.618383', 'step': 2707, 'epoch': 2} {'type': 'loss', 'content': 0.005779425147920847, 'timestamp': '2025-09-10 02:28:46.641726', 'step': 2708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:46.670826', 'step': 2708, 'epoch': 2} {'type': 'loss', 'content': 0.007377541624009609, 'timestamp': '2025-09-10 02:28:46.676004', 'step': 2709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:46.706217', 'step': 2709, 'epoch': 2} {'type': 'loss', 'content': 0.019734172150492668, 'timestamp': '2025-09-10 02:28:46.710656', 'step': 2710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:46.742635', 'step': 2710, 'epoch': 2} {'type': 'loss', 'content': 0.03871343284845352, 'timestamp': '2025-09-10 02:28:46.747148', 'step': 2711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:46.783716', 'step': 2711, 'epoch': 2} {'type': 'loss', 'content': 0.00602757278829813, 'timestamp': '2025-09-10 02:28:46.807334', 'step': 2712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:46.839400', 'step': 2712, 'epoch': 2} {'type': 'loss', 'content': 0.07382923364639282, 'timestamp': '2025-09-10 02:28:46.841522', 'step': 2713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:46.871126', 'step': 2713, 'epoch': 2} {'type': 'loss', 'content': 0.021069372072815895, 'timestamp': '2025-09-10 02:28:46.875169', 'step': 2714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:46.910015', 'step': 2714, 'epoch': 2} {'type': 'loss', 'content': 0.0741237998008728, 'timestamp': '2025-09-10 02:28:46.911900', 'step': 2715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:46.941995', 'step': 2715, 'epoch': 2} {'type': 'loss', 'content': 0.014232861809432507, 'timestamp': '2025-09-10 02:28:46.966506', 'step': 2716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:47.009606', 'step': 2716, 'epoch': 2} {'type': 'loss', 'content': 0.00317375804297626, 'timestamp': '2025-09-10 02:28:47.014529', 'step': 2717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:47.047077', 'step': 2717, 'epoch': 2} {'type': 'loss', 'content': 0.06315583735704422, 'timestamp': '2025-09-10 02:28:47.055060', 'step': 2718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:47.085452', 'step': 2718, 'epoch': 2} {'type': 'loss', 'content': 0.02573302760720253, 'timestamp': '2025-09-10 02:28:47.087579', 'step': 2719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:47.116638', 'step': 2719, 'epoch': 2} {'type': 'loss', 'content': 0.03362132981419563, 'timestamp': '2025-09-10 02:28:47.140382', 'step': 2720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:47.174223', 'step': 2720, 'epoch': 2} {'type': 'loss', 'content': 0.029691146686673164, 'timestamp': '2025-09-10 02:28:47.176187', 'step': 2721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:47.207163', 'step': 2721, 'epoch': 2} {'type': 'loss', 'content': 0.00339910970069468, 'timestamp': '2025-09-10 02:28:47.212551', 'step': 2722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:47.246462', 'step': 2722, 'epoch': 2} {'type': 'loss', 'content': 0.0015478292480111122, 'timestamp': '2025-09-10 02:28:47.248406', 'step': 2723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:47.277625', 'step': 2723, 'epoch': 2} {'type': 'loss', 'content': 0.0221834909170866, 'timestamp': '2025-09-10 02:28:47.301308', 'step': 2724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:47.330698', 'step': 2724, 'epoch': 2} {'type': 'loss', 'content': 0.025360483676195145, 'timestamp': '2025-09-10 02:28:47.332558', 'step': 2725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:47.361448', 'step': 2725, 'epoch': 2} {'type': 'loss', 'content': 0.026857640594244003, 'timestamp': '2025-09-10 02:28:47.363223', 'step': 2726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:47.392422', 'step': 2726, 'epoch': 2} {'type': 'loss', 'content': 0.011470100842416286, 'timestamp': '2025-09-10 02:28:47.394904', 'step': 2727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:47.424160', 'step': 2727, 'epoch': 2} {'type': 'loss', 'content': 0.00516451196745038, 'timestamp': '2025-09-10 02:28:47.448964', 'step': 2728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:47.477639', 'step': 2728, 'epoch': 2} {'type': 'loss', 'content': 0.016428111121058464, 'timestamp': '2025-09-10 02:28:47.479460', 'step': 2729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:47.508399', 'step': 2729, 'epoch': 2} {'type': 'loss', 'content': 0.04734635725617409, 'timestamp': '2025-09-10 02:28:47.513125', 'step': 2730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:47.549549', 'step': 2730, 'epoch': 2} {'type': 'loss', 'content': 0.02643342688679695, 'timestamp': '2025-09-10 02:28:47.554239', 'step': 2731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:47.592227', 'step': 2731, 'epoch': 2} {'type': 'loss', 'content': 0.02251642383635044, 'timestamp': '2025-09-10 02:28:47.615449', 'step': 2732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:47.646933', 'step': 2732, 'epoch': 2} {'type': 'loss', 'content': 0.014357469975948334, 'timestamp': '2025-09-10 02:28:47.655428', 'step': 2733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:47.686078', 'step': 2733, 'epoch': 2} {'type': 'loss', 'content': 0.0158222708851099, 'timestamp': '2025-09-10 02:28:47.688788', 'step': 2734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:47.718221', 'step': 2734, 'epoch': 2} {'type': 'loss', 'content': 0.0061955139972269535, 'timestamp': '2025-09-10 02:28:47.720457', 'step': 2735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:47.749479', 'step': 2735, 'epoch': 2} {'type': 'loss', 'content': 0.006888777483254671, 'timestamp': '2025-09-10 02:28:47.773040', 'step': 2736, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1582003754624}], 'timestamp': '2025-09-10 02:28:49.698666', 'step': 2736, 'epoch': 2} {'type': 'pplx', 'content': 2556436.4143981044, 'timestamp': '2025-09-10 02:28:49.700535', 'step': 2736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:49.728274', 'step': 2736, 'epoch': 2} {'type': 'loss', 'content': 0.0028547674883157015, 'timestamp': '2025-09-10 02:28:49.730163', 'step': 2737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:49.759271', 'step': 2737, 'epoch': 2} {'type': 'loss', 'content': 0.013122498989105225, 'timestamp': '2025-09-10 02:28:49.761114', 'step': 2738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:49.790063', 'step': 2738, 'epoch': 2} {'type': 'loss', 'content': 0.02464980073273182, 'timestamp': '2025-09-10 02:28:49.791883', 'step': 2739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:49.820976', 'step': 2739, 'epoch': 2} {'type': 'loss', 'content': 0.004556159023195505, 'timestamp': '2025-09-10 02:28:49.844478', 'step': 2740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:49.873459', 'step': 2740, 'epoch': 2} {'type': 'loss', 'content': 0.04443352296948433, 'timestamp': '2025-09-10 02:28:49.875229', 'step': 2741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:49.904687', 'step': 2741, 'epoch': 2} {'type': 'loss', 'content': 0.067392498254776, 'timestamp': '2025-09-10 02:28:49.906918', 'step': 2742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:49.936338', 'step': 2742, 'epoch': 2} {'type': 'loss', 'content': 0.003829964669421315, 'timestamp': '2025-09-10 02:28:49.938339', 'step': 2743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:49.967572', 'step': 2743, 'epoch': 2} {'type': 'loss', 'content': 0.0066711571998894215, 'timestamp': '2025-09-10 02:28:49.991227', 'step': 2744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:50.020217', 'step': 2744, 'epoch': 2} {'type': 'loss', 'content': 0.01632828451693058, 'timestamp': '2025-09-10 02:28:50.022102', 'step': 2745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:50.051172', 'step': 2745, 'epoch': 2} {'type': 'loss', 'content': 0.02845684252679348, 'timestamp': '2025-09-10 02:28:50.053060', 'step': 2746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:50.082236', 'step': 2746, 'epoch': 2} {'type': 'loss', 'content': 0.026504751294851303, 'timestamp': '2025-09-10 02:28:50.084024', 'step': 2747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:50.112882', 'step': 2747, 'epoch': 2} {'type': 'loss', 'content': 0.006421736441552639, 'timestamp': '2025-09-10 02:28:50.136319', 'step': 2748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:50.165335', 'step': 2748, 'epoch': 2} {'type': 'loss', 'content': 0.024044236168265343, 'timestamp': '2025-09-10 02:28:50.167389', 'step': 2749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:50.196717', 'step': 2749, 'epoch': 2} {'type': 'loss', 'content': 0.0037590719293802977, 'timestamp': '2025-09-10 02:28:50.198603', 'step': 2750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:50.228009', 'step': 2750, 'epoch': 2} {'type': 'loss', 'content': 0.007875211536884308, 'timestamp': '2025-09-10 02:28:50.230193', 'step': 2751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:50.259813', 'step': 2751, 'epoch': 2} {'type': 'loss', 'content': 0.01248131226748228, 'timestamp': '2025-09-10 02:28:50.282915', 'step': 2752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:50.312401', 'step': 2752, 'epoch': 2} {'type': 'loss', 'content': 0.013336045667529106, 'timestamp': '2025-09-10 02:28:50.314253', 'step': 2753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:50.343634', 'step': 2753, 'epoch': 2} {'type': 'loss', 'content': 0.03355873376131058, 'timestamp': '2025-09-10 02:28:50.345549', 'step': 2754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:50.374450', 'step': 2754, 'epoch': 2} {'type': 'loss', 'content': 0.004387472756206989, 'timestamp': '2025-09-10 02:28:50.376202', 'step': 2755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:50.404920', 'step': 2755, 'epoch': 2} {'type': 'loss', 'content': 0.0051027326844632626, 'timestamp': '2025-09-10 02:28:50.428117', 'step': 2756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:50.457182', 'step': 2756, 'epoch': 2} {'type': 'loss', 'content': 0.006544017698615789, 'timestamp': '2025-09-10 02:28:50.459200', 'step': 2757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:50.490368', 'step': 2757, 'epoch': 2} {'type': 'loss', 'content': 0.011391127482056618, 'timestamp': '2025-09-10 02:28:50.492471', 'step': 2758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:50.521862', 'step': 2758, 'epoch': 2} {'type': 'loss', 'content': 0.0009497334249317646, 'timestamp': '2025-09-10 02:28:50.523665', 'step': 2759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:50.552692', 'step': 2759, 'epoch': 2} {'type': 'loss', 'content': 0.0008985889726318419, 'timestamp': '2025-09-10 02:28:50.576388', 'step': 2760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:50.605634', 'step': 2760, 'epoch': 2} {'type': 'loss', 'content': 0.04270598292350769, 'timestamp': '2025-09-10 02:28:50.607464', 'step': 2761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:50.636890', 'step': 2761, 'epoch': 2} {'type': 'loss', 'content': 0.053190361708402634, 'timestamp': '2025-09-10 02:28:50.638673', 'step': 2762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:50.667640', 'step': 2762, 'epoch': 2} {'type': 'loss', 'content': 0.01089306827634573, 'timestamp': '2025-09-10 02:28:50.669449', 'step': 2763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:50.699134', 'step': 2763, 'epoch': 2} {'type': 'loss', 'content': 0.008394693024456501, 'timestamp': '2025-09-10 02:28:50.722548', 'step': 2764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:50.751523', 'step': 2764, 'epoch': 2} {'type': 'loss', 'content': 0.06284545361995697, 'timestamp': '2025-09-10 02:28:50.753458', 'step': 2765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:50.782552', 'step': 2765, 'epoch': 2} {'type': 'loss', 'content': 0.03257061913609505, 'timestamp': '2025-09-10 02:28:50.784475', 'step': 2766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:50.813339', 'step': 2766, 'epoch': 2} {'type': 'loss', 'content': 0.0028790233191102743, 'timestamp': '2025-09-10 02:28:50.815136', 'step': 2767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:50.844180', 'step': 2767, 'epoch': 2} {'type': 'loss', 'content': 0.00551459938287735, 'timestamp': '2025-09-10 02:28:50.867438', 'step': 2768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:50.896645', 'step': 2768, 'epoch': 2} {'type': 'loss', 'content': 0.001208897796459496, 'timestamp': '2025-09-10 02:28:50.898545', 'step': 2769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:50.928898', 'step': 2769, 'epoch': 2} {'type': 'loss', 'content': 0.03900022432208061, 'timestamp': '2025-09-10 02:28:50.930683', 'step': 2770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:50.959635', 'step': 2770, 'epoch': 2} {'type': 'loss', 'content': 0.03883006051182747, 'timestamp': '2025-09-10 02:28:50.961633', 'step': 2771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:50.991236', 'step': 2771, 'epoch': 2} {'type': 'loss', 'content': 0.03494889289140701, 'timestamp': '2025-09-10 02:28:51.014827', 'step': 2772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:51.044028', 'step': 2772, 'epoch': 2} {'type': 'loss', 'content': 0.05873110517859459, 'timestamp': '2025-09-10 02:28:51.045937', 'step': 2773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:51.075609', 'step': 2773, 'epoch': 2} {'type': 'loss', 'content': 0.0463673397898674, 'timestamp': '2025-09-10 02:28:51.077487', 'step': 2774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:51.106486', 'step': 2774, 'epoch': 2} {'type': 'loss', 'content': 0.034762509167194366, 'timestamp': '2025-09-10 02:28:51.108366', 'step': 2775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:51.137704', 'step': 2775, 'epoch': 2} {'type': 'loss', 'content': 0.0016691697528585792, 'timestamp': '2025-09-10 02:28:51.160974', 'step': 2776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:51.193222', 'step': 2776, 'epoch': 2} {'type': 'loss', 'content': 0.0006479284493252635, 'timestamp': '2025-09-10 02:28:51.195165', 'step': 2777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:51.224121', 'step': 2777, 'epoch': 2} {'type': 'loss', 'content': 0.0053579783998429775, 'timestamp': '2025-09-10 02:28:51.226191', 'step': 2778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:51.255044', 'step': 2778, 'epoch': 2} {'type': 'loss', 'content': 0.019901324063539505, 'timestamp': '2025-09-10 02:28:51.256844', 'step': 2779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:51.285700', 'step': 2779, 'epoch': 2} {'type': 'loss', 'content': 0.010306313633918762, 'timestamp': '2025-09-10 02:28:51.309289', 'step': 2780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:51.338821', 'step': 2780, 'epoch': 2} {'type': 'loss', 'content': 0.001360267517156899, 'timestamp': '2025-09-10 02:28:51.340616', 'step': 2781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:51.369701', 'step': 2781, 'epoch': 2} {'type': 'loss', 'content': 0.00952989887446165, 'timestamp': '2025-09-10 02:28:51.371445', 'step': 2782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:51.400921', 'step': 2782, 'epoch': 2} {'type': 'loss', 'content': 0.007531650364398956, 'timestamp': '2025-09-10 02:28:51.402891', 'step': 2783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:51.432223', 'step': 2783, 'epoch': 2} {'type': 'loss', 'content': 0.017776241526007652, 'timestamp': '2025-09-10 02:28:51.455671', 'step': 2784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:51.485004', 'step': 2784, 'epoch': 2} {'type': 'loss', 'content': 0.0023183736484497786, 'timestamp': '2025-09-10 02:28:51.486796', 'step': 2785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:51.516060', 'step': 2785, 'epoch': 2} {'type': 'loss', 'content': 0.03633001446723938, 'timestamp': '2025-09-10 02:28:51.518085', 'step': 2786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:51.547137', 'step': 2786, 'epoch': 2} {'type': 'loss', 'content': 0.007016249932348728, 'timestamp': '2025-09-10 02:28:51.549078', 'step': 2787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:51.578310', 'step': 2787, 'epoch': 2} {'type': 'loss', 'content': 0.01564197801053524, 'timestamp': '2025-09-10 02:28:51.601785', 'step': 2788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:51.630739', 'step': 2788, 'epoch': 2} {'type': 'loss', 'content': 0.011738909408450127, 'timestamp': '2025-09-10 02:28:51.632499', 'step': 2789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:51.661477', 'step': 2789, 'epoch': 2} {'type': 'loss', 'content': 0.006292745471000671, 'timestamp': '2025-09-10 02:28:51.663360', 'step': 2790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:51.692973', 'step': 2790, 'epoch': 2} {'type': 'loss', 'content': 0.004651935305446386, 'timestamp': '2025-09-10 02:28:51.694819', 'step': 2791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:51.723863', 'step': 2791, 'epoch': 2} {'type': 'loss', 'content': 0.051345258951187134, 'timestamp': '2025-09-10 02:28:51.747290', 'step': 2792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:51.776198', 'step': 2792, 'epoch': 2} {'type': 'loss', 'content': 0.01709223911166191, 'timestamp': '2025-09-10 02:28:51.778441', 'step': 2793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:51.807343', 'step': 2793, 'epoch': 2} {'type': 'loss', 'content': 0.0029445867985486984, 'timestamp': '2025-09-10 02:28:51.809237', 'step': 2794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:51.838439', 'step': 2794, 'epoch': 2} {'type': 'loss', 'content': 0.007461313623934984, 'timestamp': '2025-09-10 02:28:51.840478', 'step': 2795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:51.869521', 'step': 2795, 'epoch': 2} {'type': 'loss', 'content': 0.005373707506805658, 'timestamp': '2025-09-10 02:28:51.892561', 'step': 2796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:51.922523', 'step': 2796, 'epoch': 2} {'type': 'loss', 'content': 0.0008838448557071388, 'timestamp': '2025-09-10 02:28:51.924271', 'step': 2797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:51.953435', 'step': 2797, 'epoch': 2} {'type': 'loss', 'content': 0.0061692483723163605, 'timestamp': '2025-09-10 02:28:51.955428', 'step': 2798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:51.985341', 'step': 2798, 'epoch': 2} {'type': 'loss', 'content': 0.0168300811201334, 'timestamp': '2025-09-10 02:28:51.987080', 'step': 2799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:52.016243', 'step': 2799, 'epoch': 2} {'type': 'loss', 'content': 0.0024048867635428905, 'timestamp': '2025-09-10 02:28:52.039957', 'step': 2800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:52.069875', 'step': 2800, 'epoch': 2} {'type': 'loss', 'content': 0.02803078480064869, 'timestamp': '2025-09-10 02:28:52.071946', 'step': 2801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:52.101033', 'step': 2801, 'epoch': 2} {'type': 'loss', 'content': 0.013601968996226788, 'timestamp': '2025-09-10 02:28:52.102780', 'step': 2802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:52.131444', 'step': 2802, 'epoch': 2} {'type': 'loss', 'content': 0.004167321603745222, 'timestamp': '2025-09-10 02:28:52.133360', 'step': 2803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:52.162566', 'step': 2803, 'epoch': 2} {'type': 'loss', 'content': 0.0028820219449698925, 'timestamp': '2025-09-10 02:28:52.190505', 'step': 2804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:52.220204', 'step': 2804, 'epoch': 2} {'type': 'loss', 'content': 0.01132802851498127, 'timestamp': '2025-09-10 02:28:52.222117', 'step': 2805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:52.253631', 'step': 2805, 'epoch': 2} {'type': 'loss', 'content': 0.027890155091881752, 'timestamp': '2025-09-10 02:28:52.259984', 'step': 2806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:52.292024', 'step': 2806, 'epoch': 2} {'type': 'loss', 'content': 0.03422095999121666, 'timestamp': '2025-09-10 02:28:52.293826', 'step': 2807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:52.322801', 'step': 2807, 'epoch': 2} {'type': 'loss', 'content': 0.006916854064911604, 'timestamp': '2025-09-10 02:28:52.346028', 'step': 2808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:52.374833', 'step': 2808, 'epoch': 2} {'type': 'loss', 'content': 0.001855450333096087, 'timestamp': '2025-09-10 02:28:52.376856', 'step': 2809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:52.420502', 'step': 2809, 'epoch': 2} {'type': 'loss', 'content': 0.012419261038303375, 'timestamp': '2025-09-10 02:28:52.422856', 'step': 2810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:52.463506', 'step': 2810, 'epoch': 2} {'type': 'loss', 'content': 0.00299721397459507, 'timestamp': '2025-09-10 02:28:52.465581', 'step': 2811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:52.497599', 'step': 2811, 'epoch': 2} {'type': 'loss', 'content': 0.005516386590898037, 'timestamp': '2025-09-10 02:28:52.520804', 'step': 2812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:52.560451', 'step': 2812, 'epoch': 2} {'type': 'loss', 'content': 0.0005141026340425014, 'timestamp': '2025-09-10 02:28:52.562428', 'step': 2813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:52.592480', 'step': 2813, 'epoch': 2} {'type': 'loss', 'content': 0.008127505891025066, 'timestamp': '2025-09-10 02:28:52.594323', 'step': 2814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:52.623636', 'step': 2814, 'epoch': 2} {'type': 'loss', 'content': 0.013286756351590157, 'timestamp': '2025-09-10 02:28:52.625824', 'step': 2815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:52.670341', 'step': 2815, 'epoch': 2} {'type': 'loss', 'content': 0.022446798160672188, 'timestamp': '2025-09-10 02:28:52.694122', 'step': 2816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:52.723505', 'step': 2816, 'epoch': 2} {'type': 'loss', 'content': 0.001313581014983356, 'timestamp': '2025-09-10 02:28:52.726877', 'step': 2817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:52.761927', 'step': 2817, 'epoch': 2} {'type': 'loss', 'content': 0.02478764019906521, 'timestamp': '2025-09-10 02:28:52.763910', 'step': 2818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:52.792634', 'step': 2818, 'epoch': 2} {'type': 'loss', 'content': 0.006905491929501295, 'timestamp': '2025-09-10 02:28:52.794523', 'step': 2819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:52.823582', 'step': 2819, 'epoch': 2} {'type': 'loss', 'content': 0.00363927218131721, 'timestamp': '2025-09-10 02:28:52.847129', 'step': 2820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:52.876368', 'step': 2820, 'epoch': 2} {'type': 'loss', 'content': 0.03496801480650902, 'timestamp': '2025-09-10 02:28:52.878756', 'step': 2821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:52.907614', 'step': 2821, 'epoch': 2} {'type': 'loss', 'content': 0.0012525822967290878, 'timestamp': '2025-09-10 02:28:52.912313', 'step': 2822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:52.944580', 'step': 2822, 'epoch': 2} {'type': 'loss', 'content': 0.018051186576485634, 'timestamp': '2025-09-10 02:28:52.946490', 'step': 2823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:52.975382', 'step': 2823, 'epoch': 2} {'type': 'loss', 'content': 0.03819189593195915, 'timestamp': '2025-09-10 02:28:52.998738', 'step': 2824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:53.027724', 'step': 2824, 'epoch': 2} {'type': 'loss', 'content': 0.018002361059188843, 'timestamp': '2025-09-10 02:28:53.029454', 'step': 2825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:53.058133', 'step': 2825, 'epoch': 2} {'type': 'loss', 'content': 0.051839083433151245, 'timestamp': '2025-09-10 02:28:53.063182', 'step': 2826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:53.093877', 'step': 2826, 'epoch': 2} {'type': 'loss', 'content': 0.08625908941030502, 'timestamp': '2025-09-10 02:28:53.098347', 'step': 2827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:53.129019', 'step': 2827, 'epoch': 2} {'type': 'loss', 'content': 0.005705358926206827, 'timestamp': '2025-09-10 02:28:53.152525', 'step': 2828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:53.181545', 'step': 2828, 'epoch': 2} {'type': 'loss', 'content': 0.02136576734483242, 'timestamp': '2025-09-10 02:28:53.184246', 'step': 2829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:53.214375', 'step': 2829, 'epoch': 2} {'type': 'loss', 'content': 0.0008014339837245643, 'timestamp': '2025-09-10 02:28:53.219289', 'step': 2830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:53.249975', 'step': 2830, 'epoch': 2} {'type': 'loss', 'content': 0.024044450372457504, 'timestamp': '2025-09-10 02:28:53.251842', 'step': 2831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:53.280824', 'step': 2831, 'epoch': 2} {'type': 'loss', 'content': 0.0004118950746487826, 'timestamp': '2025-09-10 02:28:53.304796', 'step': 2832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:53.333454', 'step': 2832, 'epoch': 2} {'type': 'loss', 'content': 0.03739207237958908, 'timestamp': '2025-09-10 02:28:53.335852', 'step': 2833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:53.365560', 'step': 2833, 'epoch': 2} {'type': 'loss', 'content': 0.004224942997097969, 'timestamp': '2025-09-10 02:28:53.367454', 'step': 2834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:53.396231', 'step': 2834, 'epoch': 2} {'type': 'loss', 'content': 0.07634751498699188, 'timestamp': '2025-09-10 02:28:53.398043', 'step': 2835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:53.427049', 'step': 2835, 'epoch': 2} {'type': 'loss', 'content': 0.0822276696562767, 'timestamp': '2025-09-10 02:28:53.450585', 'step': 2836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:53.479761', 'step': 2836, 'epoch': 2} {'type': 'loss', 'content': 0.002085910877212882, 'timestamp': '2025-09-10 02:28:53.481840', 'step': 2837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:53.510608', 'step': 2837, 'epoch': 2} {'type': 'loss', 'content': 0.0021981713362038136, 'timestamp': '2025-09-10 02:28:53.512503', 'step': 2838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:53.541146', 'step': 2838, 'epoch': 2} {'type': 'loss', 'content': 0.01698652096092701, 'timestamp': '2025-09-10 02:28:53.542975', 'step': 2839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:53.571811', 'step': 2839, 'epoch': 2} {'type': 'loss', 'content': 0.03516782820224762, 'timestamp': '2025-09-10 02:28:53.597059', 'step': 2840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:28:53.626348', 'step': 2840, 'epoch': 2} {'type': 'loss', 'content': 0.0384167842566967, 'timestamp': '2025-09-10 02:28:53.628193', 'step': 2841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:53.657194', 'step': 2841, 'epoch': 2} {'type': 'loss', 'content': 0.0008761967765167356, 'timestamp': '2025-09-10 02:28:53.661970', 'step': 2842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:53.697734', 'step': 2842, 'epoch': 2} {'type': 'loss', 'content': 0.00977950356900692, 'timestamp': '2025-09-10 02:28:53.699883', 'step': 2843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:53.728962', 'step': 2843, 'epoch': 2} {'type': 'loss', 'content': 0.012906111776828766, 'timestamp': '2025-09-10 02:28:53.752680', 'step': 2844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:53.786484', 'step': 2844, 'epoch': 2} {'type': 'loss', 'content': 0.0015678195049986243, 'timestamp': '2025-09-10 02:28:53.790975', 'step': 2845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:53.820606', 'step': 2845, 'epoch': 2} {'type': 'loss', 'content': 0.032121773809194565, 'timestamp': '2025-09-10 02:28:53.823031', 'step': 2846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:53.852699', 'step': 2846, 'epoch': 2} {'type': 'loss', 'content': 0.035404808819293976, 'timestamp': '2025-09-10 02:28:53.855094', 'step': 2847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:53.884113', 'step': 2847, 'epoch': 2} {'type': 'loss', 'content': 0.004591009113937616, 'timestamp': '2025-09-10 02:28:53.907504', 'step': 2848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:53.937008', 'step': 2848, 'epoch': 2} {'type': 'loss', 'content': 0.0016618984518572688, 'timestamp': '2025-09-10 02:28:53.938758', 'step': 2849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:53.967481', 'step': 2849, 'epoch': 2} {'type': 'loss', 'content': 0.014219125732779503, 'timestamp': '2025-09-10 02:28:53.969585', 'step': 2850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:53.998685', 'step': 2850, 'epoch': 2} {'type': 'loss', 'content': 0.01862996816635132, 'timestamp': '2025-09-10 02:28:54.000567', 'step': 2851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:54.029550', 'step': 2851, 'epoch': 2} {'type': 'loss', 'content': 0.005646827165037394, 'timestamp': '2025-09-10 02:28:54.052928', 'step': 2852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:54.081709', 'step': 2852, 'epoch': 2} {'type': 'loss', 'content': 0.025086741894483566, 'timestamp': '2025-09-10 02:28:54.083550', 'step': 2853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:54.112364', 'step': 2853, 'epoch': 2} {'type': 'loss', 'content': 0.013008520938456059, 'timestamp': '2025-09-10 02:28:54.114221', 'step': 2854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:54.142867', 'step': 2854, 'epoch': 2} {'type': 'loss', 'content': 0.005398153327405453, 'timestamp': '2025-09-10 02:28:54.144623', 'step': 2855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:54.174625', 'step': 2855, 'epoch': 2} {'type': 'loss', 'content': 0.0349435992538929, 'timestamp': '2025-09-10 02:28:54.198068', 'step': 2856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:54.227002', 'step': 2856, 'epoch': 2} {'type': 'loss', 'content': 0.002373448805883527, 'timestamp': '2025-09-10 02:28:54.228909', 'step': 2857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:54.257850', 'step': 2857, 'epoch': 2} {'type': 'loss', 'content': 0.026640325784683228, 'timestamp': '2025-09-10 02:28:54.259760', 'step': 2858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:54.288703', 'step': 2858, 'epoch': 2} {'type': 'loss', 'content': 0.027330685406923294, 'timestamp': '2025-09-10 02:28:54.290778', 'step': 2859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:54.319697', 'step': 2859, 'epoch': 2} {'type': 'loss', 'content': 0.04181050881743431, 'timestamp': '2025-09-10 02:28:54.343173', 'step': 2860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:54.372862', 'step': 2860, 'epoch': 2} {'type': 'loss', 'content': 0.0061957393772900105, 'timestamp': '2025-09-10 02:28:54.374605', 'step': 2861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:54.403631', 'step': 2861, 'epoch': 2} {'type': 'loss', 'content': 0.01754625327885151, 'timestamp': '2025-09-10 02:28:54.405586', 'step': 2862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:54.434636', 'step': 2862, 'epoch': 2} {'type': 'loss', 'content': 0.01089306827634573, 'timestamp': '2025-09-10 02:28:54.436648', 'step': 2863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:54.465879', 'step': 2863, 'epoch': 2} {'type': 'loss', 'content': 0.01644907519221306, 'timestamp': '2025-09-10 02:28:54.489900', 'step': 2864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:54.518950', 'step': 2864, 'epoch': 2} {'type': 'loss', 'content': 0.0014806865947321057, 'timestamp': '2025-09-10 02:28:54.521039', 'step': 2865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:54.549860', 'step': 2865, 'epoch': 2} {'type': 'loss', 'content': 0.027932481840252876, 'timestamp': '2025-09-10 02:28:54.552522', 'step': 2866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:54.581594', 'step': 2866, 'epoch': 2} {'type': 'loss', 'content': 0.001444231136702001, 'timestamp': '2025-09-10 02:28:54.583466', 'step': 2867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:54.612135', 'step': 2867, 'epoch': 2} {'type': 'loss', 'content': 0.001635089167393744, 'timestamp': '2025-09-10 02:28:54.635857', 'step': 2868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:54.664448', 'step': 2868, 'epoch': 2} {'type': 'loss', 'content': 0.06270123273134232, 'timestamp': '2025-09-10 02:28:54.666257', 'step': 2869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:54.696875', 'step': 2869, 'epoch': 2} {'type': 'loss', 'content': 0.04237469285726547, 'timestamp': '2025-09-10 02:28:54.698910', 'step': 2870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:54.727912', 'step': 2870, 'epoch': 2} {'type': 'loss', 'content': 0.0055243996903300285, 'timestamp': '2025-09-10 02:28:54.729745', 'step': 2871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:54.758842', 'step': 2871, 'epoch': 2} {'type': 'loss', 'content': 0.0022270630579441786, 'timestamp': '2025-09-10 02:28:54.782047', 'step': 2872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:54.811084', 'step': 2872, 'epoch': 2} {'type': 'loss', 'content': 0.015764104202389717, 'timestamp': '2025-09-10 02:28:54.813173', 'step': 2873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:54.841961', 'step': 2873, 'epoch': 2} {'type': 'loss', 'content': 0.025673216208815575, 'timestamp': '2025-09-10 02:28:54.844034', 'step': 2874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:54.873356', 'step': 2874, 'epoch': 2} {'type': 'loss', 'content': 0.013369431719183922, 'timestamp': '2025-09-10 02:28:54.875270', 'step': 2875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:54.904632', 'step': 2875, 'epoch': 2} {'type': 'loss', 'content': 0.0023222228046506643, 'timestamp': '2025-09-10 02:28:54.928025', 'step': 2876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:54.957186', 'step': 2876, 'epoch': 2} {'type': 'loss', 'content': 0.03977759927511215, 'timestamp': '2025-09-10 02:28:54.959106', 'step': 2877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:54.988156', 'step': 2877, 'epoch': 2} {'type': 'loss', 'content': 0.007196251768618822, 'timestamp': '2025-09-10 02:28:54.990040', 'step': 2878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:55.018923', 'step': 2878, 'epoch': 2} {'type': 'loss', 'content': 0.004890757147222757, 'timestamp': '2025-09-10 02:28:55.021025', 'step': 2879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:55.050166', 'step': 2879, 'epoch': 2} {'type': 'loss', 'content': 0.02686142362654209, 'timestamp': '2025-09-10 02:28:55.073427', 'step': 2880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:55.102314', 'step': 2880, 'epoch': 2} {'type': 'loss', 'content': 0.015549045987427235, 'timestamp': '2025-09-10 02:28:55.104168', 'step': 2881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:55.133114', 'step': 2881, 'epoch': 2} {'type': 'loss', 'content': 0.04396245256066322, 'timestamp': '2025-09-10 02:28:55.135004', 'step': 2882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:55.163934', 'step': 2882, 'epoch': 2} {'type': 'loss', 'content': 0.0018287552520632744, 'timestamp': '2025-09-10 02:28:55.165901', 'step': 2883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:55.194907', 'step': 2883, 'epoch': 2} {'type': 'loss', 'content': 0.04077062010765076, 'timestamp': '2025-09-10 02:28:55.218384', 'step': 2884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:55.247341', 'step': 2884, 'epoch': 2} {'type': 'loss', 'content': 0.006520784460008144, 'timestamp': '2025-09-10 02:28:55.249020', 'step': 2885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:55.278048', 'step': 2885, 'epoch': 2} {'type': 'loss', 'content': 0.004114919807761908, 'timestamp': '2025-09-10 02:28:55.280068', 'step': 2886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:55.309247', 'step': 2886, 'epoch': 2} {'type': 'loss', 'content': 0.004029436502605677, 'timestamp': '2025-09-10 02:28:55.311090', 'step': 2887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:55.339984', 'step': 2887, 'epoch': 2} {'type': 'loss', 'content': 0.030637195333838463, 'timestamp': '2025-09-10 02:28:55.363626', 'step': 2888, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1582003754624}], 'timestamp': '2025-09-10 02:28:57.281618', 'step': 2888, 'epoch': 2} {'type': 'pplx', 'content': 2362714.2441541348, 'timestamp': '2025-09-10 02:28:57.283737', 'step': 2888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:57.311992', 'step': 2888, 'epoch': 2} {'type': 'loss', 'content': 0.03481084480881691, 'timestamp': '2025-09-10 02:28:57.314135', 'step': 2889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:57.343638', 'step': 2889, 'epoch': 2} {'type': 'loss', 'content': 0.009121944196522236, 'timestamp': '2025-09-10 02:28:57.345523', 'step': 2890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:57.374675', 'step': 2890, 'epoch': 2} {'type': 'loss', 'content': 0.008922411128878593, 'timestamp': '2025-09-10 02:28:57.376281', 'step': 2891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:57.405502', 'step': 2891, 'epoch': 2} {'type': 'loss', 'content': 0.09262406080961227, 'timestamp': '2025-09-10 02:28:57.429138', 'step': 2892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:57.458070', 'step': 2892, 'epoch': 2} {'type': 'loss', 'content': 0.004805420991033316, 'timestamp': '2025-09-10 02:28:57.460015', 'step': 2893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:57.488646', 'step': 2893, 'epoch': 2} {'type': 'loss', 'content': 0.00714575732126832, 'timestamp': '2025-09-10 02:28:57.490465', 'step': 2894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:57.519866', 'step': 2894, 'epoch': 2} {'type': 'loss', 'content': 0.014643056318163872, 'timestamp': '2025-09-10 02:28:57.521833', 'step': 2895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:57.551982', 'step': 2895, 'epoch': 2} {'type': 'loss', 'content': 0.022045502439141273, 'timestamp': '2025-09-10 02:28:57.575615', 'step': 2896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:57.604356', 'step': 2896, 'epoch': 2} {'type': 'loss', 'content': 0.016090938821434975, 'timestamp': '2025-09-10 02:28:57.606360', 'step': 2897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:57.635811', 'step': 2897, 'epoch': 2} {'type': 'loss', 'content': 0.014781218953430653, 'timestamp': '2025-09-10 02:28:57.637628', 'step': 2898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:57.666486', 'step': 2898, 'epoch': 2} {'type': 'loss', 'content': 0.016501110047101974, 'timestamp': '2025-09-10 02:28:57.668419', 'step': 2899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:57.697893', 'step': 2899, 'epoch': 2} {'type': 'loss', 'content': 0.0067305476404726505, 'timestamp': '2025-09-10 02:28:57.721428', 'step': 2900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:57.751126', 'step': 2900, 'epoch': 2} {'type': 'loss', 'content': 0.02997549995779991, 'timestamp': '2025-09-10 02:28:57.752913', 'step': 2901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:57.781871', 'step': 2901, 'epoch': 2} {'type': 'loss', 'content': 0.03778936341404915, 'timestamp': '2025-09-10 02:28:57.784038', 'step': 2902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:57.816766', 'step': 2902, 'epoch': 2} {'type': 'loss', 'content': 0.02480734884738922, 'timestamp': '2025-09-10 02:28:57.818623', 'step': 2903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:57.849799', 'step': 2903, 'epoch': 2} {'type': 'loss', 'content': 0.017994388937950134, 'timestamp': '2025-09-10 02:28:57.873118', 'step': 2904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:57.902255', 'step': 2904, 'epoch': 2} {'type': 'loss', 'content': 0.004165151156485081, 'timestamp': '2025-09-10 02:28:57.904126', 'step': 2905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:57.933095', 'step': 2905, 'epoch': 2} {'type': 'loss', 'content': 0.0027392476331442595, 'timestamp': '2025-09-10 02:28:57.934997', 'step': 2906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:57.963955', 'step': 2906, 'epoch': 2} {'type': 'loss', 'content': 0.023687604814767838, 'timestamp': '2025-09-10 02:28:57.965830', 'step': 2907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:57.995014', 'step': 2907, 'epoch': 2} {'type': 'loss', 'content': 0.027018358930945396, 'timestamp': '2025-09-10 02:28:58.018533', 'step': 2908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:58.048330', 'step': 2908, 'epoch': 2} {'type': 'loss', 'content': 0.007352299056947231, 'timestamp': '2025-09-10 02:28:58.050530', 'step': 2909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:58.079268', 'step': 2909, 'epoch': 2} {'type': 'loss', 'content': 0.03476090729236603, 'timestamp': '2025-09-10 02:28:58.081186', 'step': 2910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:58.110181', 'step': 2910, 'epoch': 2} {'type': 'loss', 'content': 0.007333045359700918, 'timestamp': '2025-09-10 02:28:58.112012', 'step': 2911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:58.140999', 'step': 2911, 'epoch': 2} {'type': 'loss', 'content': 0.017422862350940704, 'timestamp': '2025-09-10 02:28:58.164734', 'step': 2912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:58.194028', 'step': 2912, 'epoch': 2} {'type': 'loss', 'content': 0.03590952232480049, 'timestamp': '2025-09-10 02:28:58.196305', 'step': 2913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:58.225173', 'step': 2913, 'epoch': 2} {'type': 'loss', 'content': 0.023626228794455528, 'timestamp': '2025-09-10 02:28:58.226937', 'step': 2914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:58.256424', 'step': 2914, 'epoch': 2} {'type': 'loss', 'content': 0.010642859153449535, 'timestamp': '2025-09-10 02:28:58.259607', 'step': 2915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:58.288670', 'step': 2915, 'epoch': 2} {'type': 'loss', 'content': 0.05696403607726097, 'timestamp': '2025-09-10 02:28:58.312366', 'step': 2916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:58.342332', 'step': 2916, 'epoch': 2} {'type': 'loss', 'content': 0.01412131730467081, 'timestamp': '2025-09-10 02:28:58.344070', 'step': 2917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:58.372904', 'step': 2917, 'epoch': 2} {'type': 'loss', 'content': 0.05304744094610214, 'timestamp': '2025-09-10 02:28:58.374757', 'step': 2918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:58.404993', 'step': 2918, 'epoch': 2} {'type': 'loss', 'content': 0.01562072616070509, 'timestamp': '2025-09-10 02:28:58.407532', 'step': 2919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:58.436838', 'step': 2919, 'epoch': 2} {'type': 'loss', 'content': 0.009218626655638218, 'timestamp': '2025-09-10 02:28:58.460085', 'step': 2920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:58.489459', 'step': 2920, 'epoch': 2} {'type': 'loss', 'content': 0.01481709536164999, 'timestamp': '2025-09-10 02:28:58.491772', 'step': 2921, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:58.521016', 'step': 2921, 'epoch': 2} {'type': 'loss', 'content': 0.0690324604511261, 'timestamp': '2025-09-10 02:28:58.522801', 'step': 2922, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:58.552438', 'step': 2922, 'epoch': 2} {'type': 'loss', 'content': 0.01687428168952465, 'timestamp': '2025-09-10 02:28:58.554350', 'step': 2923, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:58.583720', 'step': 2923, 'epoch': 2} {'type': 'loss', 'content': 0.01327650249004364, 'timestamp': '2025-09-10 02:28:58.607284', 'step': 2924, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:58.637218', 'step': 2924, 'epoch': 2} {'type': 'loss', 'content': 0.06129451468586922, 'timestamp': '2025-09-10 02:28:58.639226', 'step': 2925, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:58.668446', 'step': 2925, 'epoch': 2} {'type': 'loss', 'content': 0.0045118811540305614, 'timestamp': '2025-09-10 02:28:58.670476', 'step': 2926, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:58.700156', 'step': 2926, 'epoch': 2} {'type': 'loss', 'content': 0.021669356152415276, 'timestamp': '2025-09-10 02:28:58.702253', 'step': 2927, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:58.731696', 'step': 2927, 'epoch': 2} {'type': 'loss', 'content': 0.014112145639955997, 'timestamp': '2025-09-10 02:28:58.755099', 'step': 2928, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:58.784355', 'step': 2928, 'epoch': 2} {'type': 'loss', 'content': 0.05611876770853996, 'timestamp': '2025-09-10 02:28:58.786122', 'step': 2929, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:58.816116', 'step': 2929, 'epoch': 2} {'type': 'loss', 'content': 0.03413539007306099, 'timestamp': '2025-09-10 02:28:58.817936', 'step': 2930, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:58.847248', 'step': 2930, 'epoch': 2} {'type': 'loss', 'content': 0.01542122382670641, 'timestamp': '2025-09-10 02:28:58.849742', 'step': 2931, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:58.878696', 'step': 2931, 'epoch': 2} {'type': 'loss', 'content': 0.005345365963876247, 'timestamp': '2025-09-10 02:28:58.902846', 'step': 2932, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:58.932166', 'step': 2932, 'epoch': 2} {'type': 'loss', 'content': 0.011457053013145924, 'timestamp': '2025-09-10 02:28:58.934240', 'step': 2933, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:58.963628', 'step': 2933, 'epoch': 2} {'type': 'loss', 'content': 0.014577369205653667, 'timestamp': '2025-09-10 02:28:58.965494', 'step': 2934, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:58.994637', 'step': 2934, 'epoch': 2} {'type': 'loss', 'content': 0.007745796348899603, 'timestamp': '2025-09-10 02:28:58.996491', 'step': 2935, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:59.025822', 'step': 2935, 'epoch': 2} {'type': 'loss', 'content': 0.0023972075432538986, 'timestamp': '2025-09-10 02:28:59.049329', 'step': 2936, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:59.078851', 'step': 2936, 'epoch': 2} {'type': 'loss', 'content': 0.001987746451050043, 'timestamp': '2025-09-10 02:28:59.082169', 'step': 2937, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:59.114206', 'step': 2937, 'epoch': 2} {'type': 'loss', 'content': 0.0034970049746334553, 'timestamp': '2025-09-10 02:28:59.116306', 'step': 2938, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:59.145744', 'step': 2938, 'epoch': 2} {'type': 'loss', 'content': 0.03557474911212921, 'timestamp': '2025-09-10 02:28:59.148142', 'step': 2939, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:59.177218', 'step': 2939, 'epoch': 2} {'type': 'loss', 'content': 0.021109571680426598, 'timestamp': '2025-09-10 02:28:59.200736', 'step': 2940, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:59.230277', 'step': 2940, 'epoch': 2} {'type': 'loss', 'content': 0.013695952482521534, 'timestamp': '2025-09-10 02:28:59.232315', 'step': 2941, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:59.262070', 'step': 2941, 'epoch': 2} {'type': 'loss', 'content': 0.009242476895451546, 'timestamp': '2025-09-10 02:28:59.264053', 'step': 2942, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:59.293073', 'step': 2942, 'epoch': 2} {'type': 'loss', 'content': 0.01628132350742817, 'timestamp': '2025-09-10 02:28:59.295009', 'step': 2943, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:59.323915', 'step': 2943, 'epoch': 2} {'type': 'loss', 'content': 0.009609685279428959, 'timestamp': '2025-09-10 02:28:59.347380', 'step': 2944, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:59.376503', 'step': 2944, 'epoch': 2} {'type': 'loss', 'content': 0.0017871072050184011, 'timestamp': '2025-09-10 02:28:59.378538', 'step': 2945, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:59.408653', 'step': 2945, 'epoch': 2} {'type': 'loss', 'content': 0.00531261321157217, 'timestamp': '2025-09-10 02:28:59.410629', 'step': 2946, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:59.440273', 'step': 2946, 'epoch': 2} {'type': 'loss', 'content': 0.015613110736012459, 'timestamp': '2025-09-10 02:28:59.442267', 'step': 2947, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:28:59.471194', 'step': 2947, 'epoch': 2} {'type': 'loss', 'content': 0.016179624944925308, 'timestamp': '2025-09-10 02:28:59.494597', 'step': 2948, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:59.523863', 'step': 2948, 'epoch': 2} {'type': 'loss', 'content': 0.008063350804150105, 'timestamp': '2025-09-10 02:28:59.525741', 'step': 2949, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:59.554807', 'step': 2949, 'epoch': 2} {'type': 'loss', 'content': 0.040376536548137665, 'timestamp': '2025-09-10 02:28:59.556551', 'step': 2950, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:59.585999', 'step': 2950, 'epoch': 2} {'type': 'loss', 'content': 0.016894422471523285, 'timestamp': '2025-09-10 02:28:59.587777', 'step': 2951, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:59.616877', 'step': 2951, 'epoch': 2} {'type': 'loss', 'content': 0.02639775164425373, 'timestamp': '2025-09-10 02:28:59.640323', 'step': 2952, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:59.670306', 'step': 2952, 'epoch': 2} {'type': 'loss', 'content': 0.0056940168142318726, 'timestamp': '2025-09-10 02:28:59.672321', 'step': 2953, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:59.701760', 'step': 2953, 'epoch': 2} {'type': 'loss', 'content': 0.02135222777724266, 'timestamp': '2025-09-10 02:28:59.703797', 'step': 2954, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:59.733696', 'step': 2954, 'epoch': 2} {'type': 'loss', 'content': 0.005273348186165094, 'timestamp': '2025-09-10 02:28:59.735431', 'step': 2955, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:59.764454', 'step': 2955, 'epoch': 2} {'type': 'loss', 'content': 0.011165021918714046, 'timestamp': '2025-09-10 02:28:59.787989', 'step': 2956, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:28:59.816765', 'step': 2956, 'epoch': 2} {'type': 'loss', 'content': 0.00407416420057416, 'timestamp': '2025-09-10 02:28:59.818763', 'step': 2957, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:59.848055', 'step': 2957, 'epoch': 2} {'type': 'loss', 'content': 0.004318686667829752, 'timestamp': '2025-09-10 02:28:59.850048', 'step': 2958, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:59.879019', 'step': 2958, 'epoch': 2} {'type': 'loss', 'content': 0.020520392805337906, 'timestamp': '2025-09-10 02:28:59.881020', 'step': 2959, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:59.910116', 'step': 2959, 'epoch': 2} {'type': 'loss', 'content': 0.0023496714420616627, 'timestamp': '2025-09-10 02:28:59.933376', 'step': 2960, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:59.962865', 'step': 2960, 'epoch': 2} {'type': 'loss', 'content': 0.015099072828888893, 'timestamp': '2025-09-10 02:28:59.964777', 'step': 2961, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:28:59.993299', 'step': 2961, 'epoch': 2} {'type': 'loss', 'content': 0.014655820094048977, 'timestamp': '2025-09-10 02:28:59.995135', 'step': 2962, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:00.024181', 'step': 2962, 'epoch': 2} {'type': 'loss', 'content': 0.028190674260258675, 'timestamp': '2025-09-10 02:29:00.026418', 'step': 2963, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:00.055704', 'step': 2963, 'epoch': 2} {'type': 'loss', 'content': 0.025873301550745964, 'timestamp': '2025-09-10 02:29:00.079092', 'step': 2964, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:00.108617', 'step': 2964, 'epoch': 2} {'type': 'loss', 'content': 0.04126526787877083, 'timestamp': '2025-09-10 02:29:00.110670', 'step': 2965, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:00.140624', 'step': 2965, 'epoch': 2} {'type': 'loss', 'content': 0.027858402580022812, 'timestamp': '2025-09-10 02:29:00.142623', 'step': 2966, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:00.172072', 'step': 2966, 'epoch': 2} {'type': 'loss', 'content': 0.012937195599079132, 'timestamp': '2025-09-10 02:29:00.174234', 'step': 2967, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:00.203956', 'step': 2967, 'epoch': 2} {'type': 'loss', 'content': 0.03162816911935806, 'timestamp': '2025-09-10 02:29:00.227503', 'step': 2968, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:00.256861', 'step': 2968, 'epoch': 2} {'type': 'loss', 'content': 0.0017554127844050527, 'timestamp': '2025-09-10 02:29:00.259241', 'step': 2969, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:00.288293', 'step': 2969, 'epoch': 2} {'type': 'loss', 'content': 0.002710097935050726, 'timestamp': '2025-09-10 02:29:00.290403', 'step': 2970, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:00.319301', 'step': 2970, 'epoch': 2} {'type': 'loss', 'content': 0.0070211924612522125, 'timestamp': '2025-09-10 02:29:00.321030', 'step': 2971, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:00.350530', 'step': 2971, 'epoch': 2} {'type': 'loss', 'content': 0.0010155083145946264, 'timestamp': '2025-09-10 02:29:00.373793', 'step': 2972, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:00.404322', 'step': 2972, 'epoch': 2} {'type': 'loss', 'content': 0.000555753125809133, 'timestamp': '2025-09-10 02:29:00.406028', 'step': 2973, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:00.435097', 'step': 2973, 'epoch': 2} {'type': 'loss', 'content': 0.013008542358875275, 'timestamp': '2025-09-10 02:29:00.437081', 'step': 2974, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:00.466274', 'step': 2974, 'epoch': 2} {'type': 'loss', 'content': 0.0009575859876349568, 'timestamp': '2025-09-10 02:29:00.468451', 'step': 2975, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:00.498197', 'step': 2975, 'epoch': 2} {'type': 'loss', 'content': 0.0020100839901715517, 'timestamp': '2025-09-10 02:29:00.521708', 'step': 2976, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:00.550664', 'step': 2976, 'epoch': 2} {'type': 'loss', 'content': 0.004602736793458462, 'timestamp': '2025-09-10 02:29:00.552447', 'step': 2977, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:00.581350', 'step': 2977, 'epoch': 2} {'type': 'loss', 'content': 0.04919513687491417, 'timestamp': '2025-09-10 02:29:00.583279', 'step': 2978, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:00.612169', 'step': 2978, 'epoch': 2} {'type': 'loss', 'content': 0.0027786530554294586, 'timestamp': '2025-09-10 02:29:00.614178', 'step': 2979, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:00.643009', 'step': 2979, 'epoch': 2} {'type': 'loss', 'content': 0.0006430782377719879, 'timestamp': '2025-09-10 02:29:00.666519', 'step': 2980, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:00.696638', 'step': 2980, 'epoch': 2} {'type': 'loss', 'content': 0.0048445756547153, 'timestamp': '2025-09-10 02:29:00.699090', 'step': 2981, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:00.728313', 'step': 2981, 'epoch': 2} {'type': 'loss', 'content': 0.00392432464286685, 'timestamp': '2025-09-10 02:29:00.730293', 'step': 2982, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:00.760052', 'step': 2982, 'epoch': 2} {'type': 'loss', 'content': 0.006992859300225973, 'timestamp': '2025-09-10 02:29:00.762002', 'step': 2983, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:00.791651', 'step': 2983, 'epoch': 2} {'type': 'loss', 'content': 0.0005739748594351113, 'timestamp': '2025-09-10 02:29:00.815112', 'step': 2984, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:00.845039', 'step': 2984, 'epoch': 2} {'type': 'loss', 'content': 0.0016939117340371013, 'timestamp': '2025-09-10 02:29:00.847349', 'step': 2985, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:00.876699', 'step': 2985, 'epoch': 2} {'type': 'loss', 'content': 0.0168430358171463, 'timestamp': '2025-09-10 02:29:00.878675', 'step': 2986, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:00.908493', 'step': 2986, 'epoch': 2} {'type': 'loss', 'content': 0.0028911347035318613, 'timestamp': '2025-09-10 02:29:00.910706', 'step': 2987, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:00.939730', 'step': 2987, 'epoch': 2} {'type': 'loss', 'content': 0.01523431483656168, 'timestamp': '2025-09-10 02:29:00.963297', 'step': 2988, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:00.992176', 'step': 2988, 'epoch': 2} {'type': 'loss', 'content': 0.02090136520564556, 'timestamp': '2025-09-10 02:29:00.994124', 'step': 2989, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:01.023036', 'step': 2989, 'epoch': 2} {'type': 'loss', 'content': 0.008178298361599445, 'timestamp': '2025-09-10 02:29:01.024788', 'step': 2990, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:01.053576', 'step': 2990, 'epoch': 2} {'type': 'loss', 'content': 0.016658276319503784, 'timestamp': '2025-09-10 02:29:01.055565', 'step': 2991, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:01.084491', 'step': 2991, 'epoch': 2} {'type': 'loss', 'content': 0.019692067056894302, 'timestamp': '2025-09-10 02:29:01.107689', 'step': 2992, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:01.137271', 'step': 2992, 'epoch': 2} {'type': 'loss', 'content': 0.0005797953344881535, 'timestamp': '2025-09-10 02:29:01.139096', 'step': 2993, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:01.168862', 'step': 2993, 'epoch': 2} {'type': 'loss', 'content': 0.02017974480986595, 'timestamp': '2025-09-10 02:29:01.170885', 'step': 2994, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:01.201073', 'step': 2994, 'epoch': 2} {'type': 'loss', 'content': 0.0022124892566353083, 'timestamp': '2025-09-10 02:29:01.202844', 'step': 2995, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:01.232607', 'step': 2995, 'epoch': 2} {'type': 'loss', 'content': 0.001253906637430191, 'timestamp': '2025-09-10 02:29:01.256218', 'step': 2996, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:01.285086', 'step': 2996, 'epoch': 2} {'type': 'loss', 'content': 0.010353362187743187, 'timestamp': '2025-09-10 02:29:01.287191', 'step': 2997, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:01.316141', 'step': 2997, 'epoch': 2} {'type': 'loss', 'content': 0.043722331523895264, 'timestamp': '2025-09-10 02:29:01.318267', 'step': 2998, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:01.347581', 'step': 2998, 'epoch': 2} {'type': 'loss', 'content': 0.040330640971660614, 'timestamp': '2025-09-10 02:29:01.349813', 'step': 2999, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:01.378905', 'step': 2999, 'epoch': 2} {'type': 'loss', 'content': 0.02096661739051342, 'timestamp': '2025-09-10 02:29:01.402402', 'step': 3000, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 3000', 'timestamp': '2025-09-10 02:29:05.933676', 'step': 3000, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:05.968961', 'step': 3000, 'epoch': 2} {'type': 'loss', 'content': 0.004011655226349831, 'timestamp': '2025-09-10 02:29:05.970934', 'step': 3001, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:06.000028', 'step': 3001, 'epoch': 2} {'type': 'loss', 'content': 0.023080889135599136, 'timestamp': '2025-09-10 02:29:06.002096', 'step': 3002, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:06.031260', 'step': 3002, 'epoch': 2} {'type': 'loss', 'content': 0.02378918044269085, 'timestamp': '2025-09-10 02:29:06.033269', 'step': 3003, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:06.062713', 'step': 3003, 'epoch': 2} {'type': 'loss', 'content': 0.0014864916447550058, 'timestamp': '2025-09-10 02:29:06.086162', 'step': 3004, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:06.116439', 'step': 3004, 'epoch': 2} {'type': 'loss', 'content': 0.0007168581942096353, 'timestamp': '2025-09-10 02:29:06.118404', 'step': 3005, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:06.147550', 'step': 3005, 'epoch': 2} {'type': 'loss', 'content': 0.05206666514277458, 'timestamp': '2025-09-10 02:29:06.149596', 'step': 3006, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:06.178886', 'step': 3006, 'epoch': 2} {'type': 'loss', 'content': 0.019438933581113815, 'timestamp': '2025-09-10 02:29:06.180674', 'step': 3007, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:06.209472', 'step': 3007, 'epoch': 2} {'type': 'loss', 'content': 0.002043353859335184, 'timestamp': '2025-09-10 02:29:06.233082', 'step': 3008, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:06.261783', 'step': 3008, 'epoch': 2} {'type': 'loss', 'content': 0.044223468750715256, 'timestamp': '2025-09-10 02:29:06.263836', 'step': 3009, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:06.292836', 'step': 3009, 'epoch': 2} {'type': 'loss', 'content': 0.0005792967858724296, 'timestamp': '2025-09-10 02:29:06.294508', 'step': 3010, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:06.323380', 'step': 3010, 'epoch': 2} {'type': 'loss', 'content': 0.0050115748308598995, 'timestamp': '2025-09-10 02:29:06.325368', 'step': 3011, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:06.354997', 'step': 3011, 'epoch': 2} {'type': 'loss', 'content': 0.00391251128166914, 'timestamp': '2025-09-10 02:29:06.378556', 'step': 3012, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:06.408100', 'step': 3012, 'epoch': 2} {'type': 'loss', 'content': 0.01394498161971569, 'timestamp': '2025-09-10 02:29:06.409939', 'step': 3013, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:06.438616', 'step': 3013, 'epoch': 2} {'type': 'loss', 'content': 0.003175001125782728, 'timestamp': '2025-09-10 02:29:06.440781', 'step': 3014, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:06.469759', 'step': 3014, 'epoch': 2} {'type': 'loss', 'content': 0.0010710560018196702, 'timestamp': '2025-09-10 02:29:06.471591', 'step': 3015, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:06.500640', 'step': 3015, 'epoch': 2} {'type': 'loss', 'content': 0.013937624171376228, 'timestamp': '2025-09-10 02:29:06.524144', 'step': 3016, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:06.553247', 'step': 3016, 'epoch': 2} {'type': 'loss', 'content': 0.000414675276260823, 'timestamp': '2025-09-10 02:29:06.556309', 'step': 3017, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:06.585745', 'step': 3017, 'epoch': 2} {'type': 'loss', 'content': 0.028705088421702385, 'timestamp': '2025-09-10 02:29:06.587848', 'step': 3018, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:06.617161', 'step': 3018, 'epoch': 2} {'type': 'loss', 'content': 0.0010774965630844235, 'timestamp': '2025-09-10 02:29:06.619285', 'step': 3019, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:06.648167', 'step': 3019, 'epoch': 2} {'type': 'loss', 'content': 0.004061760846525431, 'timestamp': '2025-09-10 02:29:06.671618', 'step': 3020, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:06.700559', 'step': 3020, 'epoch': 2} {'type': 'loss', 'content': 0.0024131848476827145, 'timestamp': '2025-09-10 02:29:06.702470', 'step': 3021, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:06.731890', 'step': 3021, 'epoch': 2} {'type': 'loss', 'content': 0.0018435295205563307, 'timestamp': '2025-09-10 02:29:06.733913', 'step': 3022, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:06.762739', 'step': 3022, 'epoch': 2} {'type': 'loss', 'content': 0.020958632230758667, 'timestamp': '2025-09-10 02:29:06.764810', 'step': 3023, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:06.793547', 'step': 3023, 'epoch': 2} {'type': 'loss', 'content': 0.014496517367661, 'timestamp': '2025-09-10 02:29:06.816949', 'step': 3024, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:06.846274', 'step': 3024, 'epoch': 2} {'type': 'loss', 'content': 0.005536026321351528, 'timestamp': '2025-09-10 02:29:06.848073', 'step': 3025, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:06.876949', 'step': 3025, 'epoch': 2} {'type': 'loss', 'content': 0.0032261740416288376, 'timestamp': '2025-09-10 02:29:06.878988', 'step': 3026, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:06.908360', 'step': 3026, 'epoch': 2} {'type': 'loss', 'content': 0.0034612170420587063, 'timestamp': '2025-09-10 02:29:06.910355', 'step': 3027, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:06.940011', 'step': 3027, 'epoch': 2} {'type': 'loss', 'content': 0.02481548674404621, 'timestamp': '2025-09-10 02:29:06.963543', 'step': 3028, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:06.992988', 'step': 3028, 'epoch': 2} {'type': 'loss', 'content': 0.03705863282084465, 'timestamp': '2025-09-10 02:29:06.994845', 'step': 3029, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:07.023372', 'step': 3029, 'epoch': 2} {'type': 'loss', 'content': 0.00185360386967659, 'timestamp': '2025-09-10 02:29:07.025141', 'step': 3030, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:07.054158', 'step': 3030, 'epoch': 2} {'type': 'loss', 'content': 0.008396097458899021, 'timestamp': '2025-09-10 02:29:07.056175', 'step': 3031, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:07.085877', 'step': 3031, 'epoch': 2} {'type': 'loss', 'content': 0.0005369320861063898, 'timestamp': '2025-09-10 02:29:07.109145', 'step': 3032, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:07.137610', 'step': 3032, 'epoch': 2} {'type': 'loss', 'content': 0.023257803171873093, 'timestamp': '2025-09-10 02:29:07.139434', 'step': 3033, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:07.168219', 'step': 3033, 'epoch': 2} {'type': 'loss', 'content': 0.0015755105996504426, 'timestamp': '2025-09-10 02:29:07.170023', 'step': 3034, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:07.198916', 'step': 3034, 'epoch': 2} {'type': 'loss', 'content': 0.00027058369596488774, 'timestamp': '2025-09-10 02:29:07.200704', 'step': 3035, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:07.229254', 'step': 3035, 'epoch': 2} {'type': 'loss', 'content': 0.05012970790266991, 'timestamp': '2025-09-10 02:29:07.252556', 'step': 3036, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:07.281989', 'step': 3036, 'epoch': 2} {'type': 'loss', 'content': 0.05236015096306801, 'timestamp': '2025-09-10 02:29:07.283906', 'step': 3037, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:07.312318', 'step': 3037, 'epoch': 2} {'type': 'loss', 'content': 0.0019132104935124516, 'timestamp': '2025-09-10 02:29:07.314272', 'step': 3038, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:07.343729', 'step': 3038, 'epoch': 2} {'type': 'loss', 'content': 0.00043177817133255303, 'timestamp': '2025-09-10 02:29:07.345838', 'step': 3039, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:07.374576', 'step': 3039, 'epoch': 2} {'type': 'loss', 'content': 0.010013881139457226, 'timestamp': '2025-09-10 02:29:07.397892', 'step': 3040, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1582003754624}], 'timestamp': '2025-09-10 02:29:09.334763', 'step': 3040, 'epoch': 2} {'type': 'pplx', 'content': 2529260.9904143927, 'timestamp': '2025-09-10 02:29:09.336650', 'step': 3040, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:09.364755', 'step': 3040, 'epoch': 2} {'type': 'loss', 'content': 0.017850453034043312, 'timestamp': '2025-09-10 02:29:09.366767', 'step': 3041, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:09.396693', 'step': 3041, 'epoch': 2} {'type': 'loss', 'content': 0.0032058064825832844, 'timestamp': '2025-09-10 02:29:09.398667', 'step': 3042, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:09.428534', 'step': 3042, 'epoch': 2} {'type': 'loss', 'content': 0.0006834662635810673, 'timestamp': '2025-09-10 02:29:09.430565', 'step': 3043, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:09.460812', 'step': 3043, 'epoch': 2} {'type': 'loss', 'content': 0.0007365393685176969, 'timestamp': '2025-09-10 02:29:09.484438', 'step': 3044, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:09.513925', 'step': 3044, 'epoch': 2} {'type': 'loss', 'content': 0.06051953509449959, 'timestamp': '2025-09-10 02:29:09.515943', 'step': 3045, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:09.544765', 'step': 3045, 'epoch': 2} {'type': 'loss', 'content': 0.009152843616902828, 'timestamp': '2025-09-10 02:29:09.546873', 'step': 3046, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:09.575880', 'step': 3046, 'epoch': 2} {'type': 'loss', 'content': 0.00657705357298255, 'timestamp': '2025-09-10 02:29:09.577795', 'step': 3047, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:09.607046', 'step': 3047, 'epoch': 2} {'type': 'loss', 'content': 0.0009380311821587384, 'timestamp': '2025-09-10 02:29:09.630488', 'step': 3048, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:09.660097', 'step': 3048, 'epoch': 2} {'type': 'loss', 'content': 0.01918032392859459, 'timestamp': '2025-09-10 02:29:09.662208', 'step': 3049, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:09.690970', 'step': 3049, 'epoch': 2} {'type': 'loss', 'content': 0.007777246180921793, 'timestamp': '2025-09-10 02:29:09.693080', 'step': 3050, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:09.722155', 'step': 3050, 'epoch': 2} {'type': 'loss', 'content': 0.005746803712099791, 'timestamp': '2025-09-10 02:29:09.723827', 'step': 3051, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:09.752403', 'step': 3051, 'epoch': 2} {'type': 'loss', 'content': 0.0005835113115608692, 'timestamp': '2025-09-10 02:29:09.775976', 'step': 3052, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:09.805095', 'step': 3052, 'epoch': 2} {'type': 'loss', 'content': 0.047276873141527176, 'timestamp': '2025-09-10 02:29:09.806943', 'step': 3053, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:09.836865', 'step': 3053, 'epoch': 2} {'type': 'loss', 'content': 0.001643635332584381, 'timestamp': '2025-09-10 02:29:09.838670', 'step': 3054, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:09.867705', 'step': 3054, 'epoch': 2} {'type': 'loss', 'content': 0.005624609533697367, 'timestamp': '2025-09-10 02:29:09.869830', 'step': 3055, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:09.898985', 'step': 3055, 'epoch': 2} {'type': 'loss', 'content': 0.00035895127803087234, 'timestamp': '2025-09-10 02:29:09.922221', 'step': 3056, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:09.955148', 'step': 3056, 'epoch': 2} {'type': 'loss', 'content': 0.001712158671580255, 'timestamp': '2025-09-10 02:29:09.957155', 'step': 3057, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:09.986529', 'step': 3057, 'epoch': 2} {'type': 'loss', 'content': 0.010987815447151661, 'timestamp': '2025-09-10 02:29:09.988460', 'step': 3058, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:10.017985', 'step': 3058, 'epoch': 2} {'type': 'loss', 'content': 0.006093787960708141, 'timestamp': '2025-09-10 02:29:10.019830', 'step': 3059, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:10.048499', 'step': 3059, 'epoch': 2} {'type': 'loss', 'content': 0.04451434686779976, 'timestamp': '2025-09-10 02:29:10.071966', 'step': 3060, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:10.101624', 'step': 3060, 'epoch': 2} {'type': 'loss', 'content': 0.009569605812430382, 'timestamp': '2025-09-10 02:29:10.103733', 'step': 3061, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:10.132628', 'step': 3061, 'epoch': 2} {'type': 'loss', 'content': 0.01330816000699997, 'timestamp': '2025-09-10 02:29:10.134551', 'step': 3062, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:10.163560', 'step': 3062, 'epoch': 2} {'type': 'loss', 'content': 0.004482160788029432, 'timestamp': '2025-09-10 02:29:10.165285', 'step': 3063, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:10.194604', 'step': 3063, 'epoch': 2} {'type': 'loss', 'content': 0.0006850509089417756, 'timestamp': '2025-09-10 02:29:10.218143', 'step': 3064, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:10.247023', 'step': 3064, 'epoch': 2} {'type': 'loss', 'content': 0.001920659327879548, 'timestamp': '2025-09-10 02:29:10.249046', 'step': 3065, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:10.277988', 'step': 3065, 'epoch': 2} {'type': 'loss', 'content': 0.009923846460878849, 'timestamp': '2025-09-10 02:29:10.279836', 'step': 3066, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:10.308693', 'step': 3066, 'epoch': 2} {'type': 'loss', 'content': 0.007846372202038765, 'timestamp': '2025-09-10 02:29:10.310742', 'step': 3067, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:10.340271', 'step': 3067, 'epoch': 2} {'type': 'loss', 'content': 0.03626176342368126, 'timestamp': '2025-09-10 02:29:10.363868', 'step': 3068, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:10.393584', 'step': 3068, 'epoch': 2} {'type': 'loss', 'content': 0.00045175960985943675, 'timestamp': '2025-09-10 02:29:10.395595', 'step': 3069, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:10.424405', 'step': 3069, 'epoch': 2} {'type': 'loss', 'content': 0.003350152401253581, 'timestamp': '2025-09-10 02:29:10.426402', 'step': 3070, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:10.455862', 'step': 3070, 'epoch': 2} {'type': 'loss', 'content': 0.031238805502653122, 'timestamp': '2025-09-10 02:29:10.457727', 'step': 3071, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:10.486844', 'step': 3071, 'epoch': 2} {'type': 'loss', 'content': 0.006795932073146105, 'timestamp': '2025-09-10 02:29:10.510611', 'step': 3072, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:10.540978', 'step': 3072, 'epoch': 2} {'type': 'loss', 'content': 0.00045838733785785735, 'timestamp': '2025-09-10 02:29:10.543152', 'step': 3073, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:10.572814', 'step': 3073, 'epoch': 2} {'type': 'loss', 'content': 0.06630359590053558, 'timestamp': '2025-09-10 02:29:10.575224', 'step': 3074, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:10.604528', 'step': 3074, 'epoch': 2} {'type': 'loss', 'content': 0.0024545660708099604, 'timestamp': '2025-09-10 02:29:10.606815', 'step': 3075, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:10.636991', 'step': 3075, 'epoch': 2} {'type': 'loss', 'content': 0.001574228866957128, 'timestamp': '2025-09-10 02:29:10.660372', 'step': 3076, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:10.689435', 'step': 3076, 'epoch': 2} {'type': 'loss', 'content': 0.002156176371499896, 'timestamp': '2025-09-10 02:29:10.691461', 'step': 3077, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:10.720906', 'step': 3077, 'epoch': 2} {'type': 'loss', 'content': 0.0014538370305672288, 'timestamp': '2025-09-10 02:29:10.722831', 'step': 3078, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:10.751698', 'step': 3078, 'epoch': 2} {'type': 'loss', 'content': 0.0026938088703900576, 'timestamp': '2025-09-10 02:29:10.753379', 'step': 3079, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:10.782567', 'step': 3079, 'epoch': 2} {'type': 'loss', 'content': 0.0007980852387845516, 'timestamp': '2025-09-10 02:29:10.805950', 'step': 3080, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:10.835722', 'step': 3080, 'epoch': 2} {'type': 'loss', 'content': 0.04152765870094299, 'timestamp': '2025-09-10 02:29:10.837454', 'step': 3081, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:10.866182', 'step': 3081, 'epoch': 2} {'type': 'loss', 'content': 0.025887921452522278, 'timestamp': '2025-09-10 02:29:10.868036', 'step': 3082, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:10.896838', 'step': 3082, 'epoch': 2} {'type': 'loss', 'content': 0.03414091467857361, 'timestamp': '2025-09-10 02:29:10.898560', 'step': 3083, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:10.927043', 'step': 3083, 'epoch': 2} {'type': 'loss', 'content': 0.01751689985394478, 'timestamp': '2025-09-10 02:29:10.950418', 'step': 3084, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:10.979233', 'step': 3084, 'epoch': 2} {'type': 'loss', 'content': 0.002506793709471822, 'timestamp': '2025-09-10 02:29:10.981005', 'step': 3085, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:11.010473', 'step': 3085, 'epoch': 2} {'type': 'loss', 'content': 0.0034624426625669003, 'timestamp': '2025-09-10 02:29:11.012345', 'step': 3086, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:11.041702', 'step': 3086, 'epoch': 2} {'type': 'loss', 'content': 0.003846134291961789, 'timestamp': '2025-09-10 02:29:11.043851', 'step': 3087, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:11.073319', 'step': 3087, 'epoch': 2} {'type': 'loss', 'content': 0.001097044674679637, 'timestamp': '2025-09-10 02:29:11.096912', 'step': 3088, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:11.126132', 'step': 3088, 'epoch': 2} {'type': 'loss', 'content': 0.008880463428795338, 'timestamp': '2025-09-10 02:29:11.127791', 'step': 3089, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:11.156288', 'step': 3089, 'epoch': 2} {'type': 'loss', 'content': 0.05466269701719284, 'timestamp': '2025-09-10 02:29:11.158283', 'step': 3090, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:11.188170', 'step': 3090, 'epoch': 2} {'type': 'loss', 'content': 0.00393714802339673, 'timestamp': '2025-09-10 02:29:11.191565', 'step': 3091, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:11.223822', 'step': 3091, 'epoch': 2} {'type': 'loss', 'content': 0.017720578238368034, 'timestamp': '2025-09-10 02:29:11.247372', 'step': 3092, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:11.277390', 'step': 3092, 'epoch': 2} {'type': 'loss', 'content': 0.007730663288384676, 'timestamp': '2025-09-10 02:29:11.279485', 'step': 3093, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:11.308856', 'step': 3093, 'epoch': 2} {'type': 'loss', 'content': 0.01206688117235899, 'timestamp': '2025-09-10 02:29:11.311077', 'step': 3094, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:11.340516', 'step': 3094, 'epoch': 2} {'type': 'loss', 'content': 0.006622540298849344, 'timestamp': '2025-09-10 02:29:11.342220', 'step': 3095, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:11.371695', 'step': 3095, 'epoch': 2} {'type': 'loss', 'content': 0.008492736145853996, 'timestamp': '2025-09-10 02:29:11.395377', 'step': 3096, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:11.424970', 'step': 3096, 'epoch': 2} {'type': 'loss', 'content': 0.01140634622424841, 'timestamp': '2025-09-10 02:29:11.426795', 'step': 3097, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:11.455624', 'step': 3097, 'epoch': 2} {'type': 'loss', 'content': 0.025438308715820312, 'timestamp': '2025-09-10 02:29:11.457428', 'step': 3098, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:11.486488', 'step': 3098, 'epoch': 2} {'type': 'loss', 'content': 0.0009789171162992716, 'timestamp': '2025-09-10 02:29:11.488393', 'step': 3099, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:11.517402', 'step': 3099, 'epoch': 2} {'type': 'loss', 'content': 0.0019120399374514818, 'timestamp': '2025-09-10 02:29:11.540522', 'step': 3100, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:11.569700', 'step': 3100, 'epoch': 2} {'type': 'loss', 'content': 0.0009125957149080932, 'timestamp': '2025-09-10 02:29:11.571445', 'step': 3101, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:11.600430', 'step': 3101, 'epoch': 2} {'type': 'loss', 'content': 0.03425338864326477, 'timestamp': '2025-09-10 02:29:11.602418', 'step': 3102, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:11.631337', 'step': 3102, 'epoch': 2} {'type': 'loss', 'content': 0.05665156617760658, 'timestamp': '2025-09-10 02:29:11.633280', 'step': 3103, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:11.662255', 'step': 3103, 'epoch': 2} {'type': 'loss', 'content': 0.003658808534964919, 'timestamp': '2025-09-10 02:29:11.685879', 'step': 3104, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:11.714811', 'step': 3104, 'epoch': 2} {'type': 'loss', 'content': 0.03745085000991821, 'timestamp': '2025-09-10 02:29:11.716590', 'step': 3105, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:11.745536', 'step': 3105, 'epoch': 2} {'type': 'loss', 'content': 0.0003341633710078895, 'timestamp': '2025-09-10 02:29:11.747307', 'step': 3106, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:11.776425', 'step': 3106, 'epoch': 2} {'type': 'loss', 'content': 0.002007808769121766, 'timestamp': '2025-09-10 02:29:11.778426', 'step': 3107, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:11.807517', 'step': 3107, 'epoch': 2} {'type': 'loss', 'content': 0.005115970969200134, 'timestamp': '2025-09-10 02:29:11.831333', 'step': 3108, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:11.860781', 'step': 3108, 'epoch': 2} {'type': 'loss', 'content': 0.015206390991806984, 'timestamp': '2025-09-10 02:29:11.862417', 'step': 3109, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:11.891718', 'step': 3109, 'epoch': 2} {'type': 'loss', 'content': 0.03465424105525017, 'timestamp': '2025-09-10 02:29:11.893485', 'step': 3110, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:11.923847', 'step': 3110, 'epoch': 2} {'type': 'loss', 'content': 0.0011160260764881968, 'timestamp': '2025-09-10 02:29:11.925569', 'step': 3111, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:11.954453', 'step': 3111, 'epoch': 2} {'type': 'loss', 'content': 0.005928417202085257, 'timestamp': '2025-09-10 02:29:11.978101', 'step': 3112, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:12.009638', 'step': 3112, 'epoch': 2} {'type': 'loss', 'content': 0.01995454542338848, 'timestamp': '2025-09-10 02:29:12.011592', 'step': 3113, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:12.041226', 'step': 3113, 'epoch': 2} {'type': 'loss', 'content': 0.0028909172397106886, 'timestamp': '2025-09-10 02:29:12.043294', 'step': 3114, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:12.072585', 'step': 3114, 'epoch': 2} {'type': 'loss', 'content': 0.014027678407728672, 'timestamp': '2025-09-10 02:29:12.076082', 'step': 3115, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:12.109868', 'step': 3115, 'epoch': 2} {'type': 'loss', 'content': 0.006129259709268808, 'timestamp': '2025-09-10 02:29:12.133368', 'step': 3116, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:12.166686', 'step': 3116, 'epoch': 2} {'type': 'loss', 'content': 0.0013742366572842002, 'timestamp': '2025-09-10 02:29:12.168508', 'step': 3117, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:12.199439', 'step': 3117, 'epoch': 2} {'type': 'loss', 'content': 0.0012813439825549722, 'timestamp': '2025-09-10 02:29:12.202275', 'step': 3118, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:12.232438', 'step': 3118, 'epoch': 2} {'type': 'loss', 'content': 0.0006782165146432817, 'timestamp': '2025-09-10 02:29:12.234435', 'step': 3119, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:12.269415', 'step': 3119, 'epoch': 2} {'type': 'loss', 'content': 0.004172059241682291, 'timestamp': '2025-09-10 02:29:12.295185', 'step': 3120, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:12.327121', 'step': 3120, 'epoch': 2} {'type': 'loss', 'content': 0.0012155737495049834, 'timestamp': '2025-09-10 02:29:12.328845', 'step': 3121, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:12.357861', 'step': 3121, 'epoch': 2} {'type': 'loss', 'content': 0.02209334261715412, 'timestamp': '2025-09-10 02:29:12.360064', 'step': 3122, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:12.389304', 'step': 3122, 'epoch': 2} {'type': 'loss', 'content': 0.014424113556742668, 'timestamp': '2025-09-10 02:29:12.391307', 'step': 3123, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:12.420607', 'step': 3123, 'epoch': 2} {'type': 'loss', 'content': 0.0037881096359342337, 'timestamp': '2025-09-10 02:29:12.443950', 'step': 3124, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:12.476094', 'step': 3124, 'epoch': 2} {'type': 'loss', 'content': 0.0020416802726686, 'timestamp': '2025-09-10 02:29:12.478484', 'step': 3125, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:12.512506', 'step': 3125, 'epoch': 2} {'type': 'loss', 'content': 0.016164317727088928, 'timestamp': '2025-09-10 02:29:12.514475', 'step': 3126, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:12.544233', 'step': 3126, 'epoch': 2} {'type': 'loss', 'content': 0.025949114933609962, 'timestamp': '2025-09-10 02:29:12.546443', 'step': 3127, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:12.576481', 'step': 3127, 'epoch': 2} {'type': 'loss', 'content': 0.04568003490567207, 'timestamp': '2025-09-10 02:29:12.600043', 'step': 3128, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:12.629595', 'step': 3128, 'epoch': 2} {'type': 'loss', 'content': 0.01625070534646511, 'timestamp': '2025-09-10 02:29:12.636823', 'step': 3129, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:12.670142', 'step': 3129, 'epoch': 2} {'type': 'loss', 'content': 0.007189703173935413, 'timestamp': '2025-09-10 02:29:12.671999', 'step': 3130, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:12.700552', 'step': 3130, 'epoch': 2} {'type': 'loss', 'content': 0.00547413295134902, 'timestamp': '2025-09-10 02:29:12.702551', 'step': 3131, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:12.731239', 'step': 3131, 'epoch': 2} {'type': 'loss', 'content': 0.0035093778278678656, 'timestamp': '2025-09-10 02:29:12.757388', 'step': 3132, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:12.786869', 'step': 3132, 'epoch': 2} {'type': 'loss', 'content': 0.002069595968350768, 'timestamp': '2025-09-10 02:29:12.788735', 'step': 3133, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:12.817485', 'step': 3133, 'epoch': 2} {'type': 'loss', 'content': 0.017753830179572105, 'timestamp': '2025-09-10 02:29:12.819442', 'step': 3134, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:12.851231', 'step': 3134, 'epoch': 2} {'type': 'loss', 'content': 0.029736889526247978, 'timestamp': '2025-09-10 02:29:12.852979', 'step': 3135, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:12.881858', 'step': 3135, 'epoch': 2} {'type': 'loss', 'content': 0.002453204710036516, 'timestamp': '2025-09-10 02:29:12.907373', 'step': 3136, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:12.936718', 'step': 3136, 'epoch': 2} {'type': 'loss', 'content': 0.005446590483188629, 'timestamp': '2025-09-10 02:29:12.938835', 'step': 3137, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:12.969332', 'step': 3137, 'epoch': 2} {'type': 'loss', 'content': 0.022114822641015053, 'timestamp': '2025-09-10 02:29:12.971338', 'step': 3138, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:13.000291', 'step': 3138, 'epoch': 2} {'type': 'loss', 'content': 0.0009241848601959646, 'timestamp': '2025-09-10 02:29:13.002342', 'step': 3139, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:13.032016', 'step': 3139, 'epoch': 2} {'type': 'loss', 'content': 0.009086194448173046, 'timestamp': '2025-09-10 02:29:13.055454', 'step': 3140, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:13.085175', 'step': 3140, 'epoch': 2} {'type': 'loss', 'content': 0.0011872841278091073, 'timestamp': '2025-09-10 02:29:13.086856', 'step': 3141, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:13.116138', 'step': 3141, 'epoch': 2} {'type': 'loss', 'content': 0.002385688479989767, 'timestamp': '2025-09-10 02:29:13.117898', 'step': 3142, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:13.147242', 'step': 3142, 'epoch': 2} {'type': 'loss', 'content': 0.009739254601299763, 'timestamp': '2025-09-10 02:29:13.149260', 'step': 3143, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:13.182971', 'step': 3143, 'epoch': 2} {'type': 'loss', 'content': 0.01810370199382305, 'timestamp': '2025-09-10 02:29:13.206462', 'step': 3144, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:13.236033', 'step': 3144, 'epoch': 2} {'type': 'loss', 'content': 0.006289140321314335, 'timestamp': '2025-09-10 02:29:13.238060', 'step': 3145, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:13.267260', 'step': 3145, 'epoch': 2} {'type': 'loss', 'content': 0.003966487944126129, 'timestamp': '2025-09-10 02:29:13.269118', 'step': 3146, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:13.298131', 'step': 3146, 'epoch': 2} {'type': 'loss', 'content': 0.005891531240195036, 'timestamp': '2025-09-10 02:29:13.300236', 'step': 3147, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:13.329705', 'step': 3147, 'epoch': 2} {'type': 'loss', 'content': 0.002595615340396762, 'timestamp': '2025-09-10 02:29:13.353317', 'step': 3148, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:13.383691', 'step': 3148, 'epoch': 2} {'type': 'loss', 'content': 0.006208585109561682, 'timestamp': '2025-09-10 02:29:13.387017', 'step': 3149, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:13.417449', 'step': 3149, 'epoch': 2} {'type': 'loss', 'content': 0.01005286630243063, 'timestamp': '2025-09-10 02:29:13.419480', 'step': 3150, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:13.448964', 'step': 3150, 'epoch': 2} {'type': 'loss', 'content': 0.0018498104764148593, 'timestamp': '2025-09-10 02:29:13.451127', 'step': 3151, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:13.481616', 'step': 3151, 'epoch': 2} {'type': 'loss', 'content': 0.0013147999998182058, 'timestamp': '2025-09-10 02:29:13.505335', 'step': 3152, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:13.534788', 'step': 3152, 'epoch': 2} {'type': 'loss', 'content': 0.03542206436395645, 'timestamp': '2025-09-10 02:29:13.538461', 'step': 3153, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:13.567106', 'step': 3153, 'epoch': 2} {'type': 'loss', 'content': 0.003105960553511977, 'timestamp': '2025-09-10 02:29:13.569326', 'step': 3154, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:13.599576', 'step': 3154, 'epoch': 2} {'type': 'loss', 'content': 0.0005990318604744971, 'timestamp': '2025-09-10 02:29:13.601299', 'step': 3155, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:13.630475', 'step': 3155, 'epoch': 2} {'type': 'loss', 'content': 0.0057794926688075066, 'timestamp': '2025-09-10 02:29:13.653978', 'step': 3156, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:13.687824', 'step': 3156, 'epoch': 2} {'type': 'loss', 'content': 0.007182592060416937, 'timestamp': '2025-09-10 02:29:13.692296', 'step': 3157, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:13.721570', 'step': 3157, 'epoch': 2} {'type': 'loss', 'content': 0.0039300271309912205, 'timestamp': '2025-09-10 02:29:13.723704', 'step': 3158, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:13.752248', 'step': 3158, 'epoch': 2} {'type': 'loss', 'content': 0.013558912090957165, 'timestamp': '2025-09-10 02:29:13.756519', 'step': 3159, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:13.785751', 'step': 3159, 'epoch': 2} {'type': 'loss', 'content': 0.00507943332195282, 'timestamp': '2025-09-10 02:29:13.809314', 'step': 3160, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:13.838224', 'step': 3160, 'epoch': 2} {'type': 'loss', 'content': 0.0003966097719967365, 'timestamp': '2025-09-10 02:29:13.841903', 'step': 3161, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:13.873489', 'step': 3161, 'epoch': 2} {'type': 'loss', 'content': 0.0024395675864070654, 'timestamp': '2025-09-10 02:29:13.875363', 'step': 3162, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:13.904415', 'step': 3162, 'epoch': 2} {'type': 'loss', 'content': 0.004043197724968195, 'timestamp': '2025-09-10 02:29:13.906523', 'step': 3163, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:13.935837', 'step': 3163, 'epoch': 2} {'type': 'loss', 'content': 0.002843863097950816, 'timestamp': '2025-09-10 02:29:13.959573', 'step': 3164, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:13.989041', 'step': 3164, 'epoch': 2} {'type': 'loss', 'content': 0.037573374807834625, 'timestamp': '2025-09-10 02:29:13.991183', 'step': 3165, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:14.020714', 'step': 3165, 'epoch': 2} {'type': 'loss', 'content': 0.005158807151019573, 'timestamp': '2025-09-10 02:29:14.022916', 'step': 3166, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:14.062017', 'step': 3166, 'epoch': 2} {'type': 'loss', 'content': 0.00035865677637048066, 'timestamp': '2025-09-10 02:29:14.063947', 'step': 3167, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:14.092466', 'step': 3167, 'epoch': 2} {'type': 'loss', 'content': 0.017983097583055496, 'timestamp': '2025-09-10 02:29:14.116099', 'step': 3168, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:14.144899', 'step': 3168, 'epoch': 2} {'type': 'loss', 'content': 0.04602109640836716, 'timestamp': '2025-09-10 02:29:14.146907', 'step': 3169, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:14.175769', 'step': 3169, 'epoch': 2} {'type': 'loss', 'content': 0.0015665811952203512, 'timestamp': '2025-09-10 02:29:14.177484', 'step': 3170, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:14.206385', 'step': 3170, 'epoch': 2} {'type': 'loss', 'content': 0.0035023889504373074, 'timestamp': '2025-09-10 02:29:14.208349', 'step': 3171, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:14.237520', 'step': 3171, 'epoch': 2} {'type': 'loss', 'content': 0.0018404703587293625, 'timestamp': '2025-09-10 02:29:14.262373', 'step': 3172, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:14.291094', 'step': 3172, 'epoch': 2} {'type': 'loss', 'content': 0.01921190693974495, 'timestamp': '2025-09-10 02:29:14.292867', 'step': 3173, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:14.321432', 'step': 3173, 'epoch': 2} {'type': 'loss', 'content': 0.005809496622532606, 'timestamp': '2025-09-10 02:29:14.323544', 'step': 3174, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:14.352382', 'step': 3174, 'epoch': 2} {'type': 'loss', 'content': 0.0011441393289715052, 'timestamp': '2025-09-10 02:29:14.354242', 'step': 3175, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:14.383490', 'step': 3175, 'epoch': 2} {'type': 'loss', 'content': 0.0005539747653529048, 'timestamp': '2025-09-10 02:29:14.407176', 'step': 3176, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:14.436374', 'step': 3176, 'epoch': 2} {'type': 'loss', 'content': 0.0010046872776001692, 'timestamp': '2025-09-10 02:29:14.438267', 'step': 3177, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:14.466899', 'step': 3177, 'epoch': 2} {'type': 'loss', 'content': 0.009421751834452152, 'timestamp': '2025-09-10 02:29:14.469011', 'step': 3178, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:14.497943', 'step': 3178, 'epoch': 2} {'type': 'loss', 'content': 0.0006441475707106292, 'timestamp': '2025-09-10 02:29:14.500037', 'step': 3179, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:14.528936', 'step': 3179, 'epoch': 2} {'type': 'loss', 'content': 0.04342738166451454, 'timestamp': '2025-09-10 02:29:14.552663', 'step': 3180, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:14.581745', 'step': 3180, 'epoch': 2} {'type': 'loss', 'content': 0.0749165415763855, 'timestamp': '2025-09-10 02:29:14.584038', 'step': 3181, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:14.613276', 'step': 3181, 'epoch': 2} {'type': 'loss', 'content': 0.0005101025453768671, 'timestamp': '2025-09-10 02:29:14.615323', 'step': 3182, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:14.644711', 'step': 3182, 'epoch': 2} {'type': 'loss', 'content': 0.007324092090129852, 'timestamp': '2025-09-10 02:29:14.646424', 'step': 3183, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:14.675040', 'step': 3183, 'epoch': 2} {'type': 'loss', 'content': 0.06100691482424736, 'timestamp': '2025-09-10 02:29:14.698540', 'step': 3184, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:14.728525', 'step': 3184, 'epoch': 2} {'type': 'loss', 'content': 0.003953658509999514, 'timestamp': '2025-09-10 02:29:14.730418', 'step': 3185, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:14.759563', 'step': 3185, 'epoch': 2} {'type': 'loss', 'content': 0.008083908818662167, 'timestamp': '2025-09-10 02:29:14.761682', 'step': 3186, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:14.790780', 'step': 3186, 'epoch': 2} {'type': 'loss', 'content': 0.051983606070280075, 'timestamp': '2025-09-10 02:29:14.792681', 'step': 3187, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:14.821914', 'step': 3187, 'epoch': 2} {'type': 'loss', 'content': 0.0013342405436560512, 'timestamp': '2025-09-10 02:29:14.845667', 'step': 3188, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:14.874837', 'step': 3188, 'epoch': 2} {'type': 'loss', 'content': 0.00035986039438284934, 'timestamp': '2025-09-10 02:29:14.878690', 'step': 3189, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:14.909883', 'step': 3189, 'epoch': 2} {'type': 'loss', 'content': 0.007039155811071396, 'timestamp': '2025-09-10 02:29:14.911590', 'step': 3190, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:14.949537', 'step': 3190, 'epoch': 2} {'type': 'loss', 'content': 0.029422583058476448, 'timestamp': '2025-09-10 02:29:14.951515', 'step': 3191, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:14.981321', 'step': 3191, 'epoch': 2} {'type': 'loss', 'content': 0.029646027833223343, 'timestamp': '2025-09-10 02:29:15.004893', 'step': 3192, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1582003754624}], 'timestamp': '2025-09-10 02:29:16.917711', 'step': 3192, 'epoch': 2} {'type': 'pplx', 'content': 2238869.448983728, 'timestamp': '2025-09-10 02:29:16.919955', 'step': 3192, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:16.948711', 'step': 3192, 'epoch': 2} {'type': 'loss', 'content': 0.0014405797701328993, 'timestamp': '2025-09-10 02:29:16.950660', 'step': 3193, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:16.979878', 'step': 3193, 'epoch': 2} {'type': 'loss', 'content': 0.04845903441309929, 'timestamp': '2025-09-10 02:29:16.982010', 'step': 3194, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:17.011306', 'step': 3194, 'epoch': 2} {'type': 'loss', 'content': 0.00040100738988257945, 'timestamp': '2025-09-10 02:29:17.013302', 'step': 3195, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:17.042693', 'step': 3195, 'epoch': 2} {'type': 'loss', 'content': 0.0234959926456213, 'timestamp': '2025-09-10 02:29:17.066436', 'step': 3196, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:17.094820', 'step': 3196, 'epoch': 2} {'type': 'loss', 'content': 0.0010534286266192794, 'timestamp': '2025-09-10 02:29:17.096830', 'step': 3197, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:17.125435', 'step': 3197, 'epoch': 2} {'type': 'loss', 'content': 0.0017616221448406577, 'timestamp': '2025-09-10 02:29:17.127380', 'step': 3198, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:17.156707', 'step': 3198, 'epoch': 2} {'type': 'loss', 'content': 0.011382815428078175, 'timestamp': '2025-09-10 02:29:17.158832', 'step': 3199, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:17.188047', 'step': 3199, 'epoch': 2} {'type': 'loss', 'content': 0.009754389524459839, 'timestamp': '2025-09-10 02:29:17.211480', 'step': 3200, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:17.239983', 'step': 3200, 'epoch': 2} {'type': 'loss', 'content': 0.00555665185675025, 'timestamp': '2025-09-10 02:29:17.241751', 'step': 3201, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:17.270341', 'step': 3201, 'epoch': 2} {'type': 'loss', 'content': 0.00267073349095881, 'timestamp': '2025-09-10 02:29:17.272313', 'step': 3202, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:17.301575', 'step': 3202, 'epoch': 2} {'type': 'loss', 'content': 0.004118208773434162, 'timestamp': '2025-09-10 02:29:17.303636', 'step': 3203, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:17.332445', 'step': 3203, 'epoch': 2} {'type': 'loss', 'content': 0.014600671827793121, 'timestamp': '2025-09-10 02:29:17.356133', 'step': 3204, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:17.385121', 'step': 3204, 'epoch': 2} {'type': 'loss', 'content': 0.003516458673402667, 'timestamp': '2025-09-10 02:29:17.388132', 'step': 3205, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:17.419277', 'step': 3205, 'epoch': 2} {'type': 'loss', 'content': 0.007552871946245432, 'timestamp': '2025-09-10 02:29:17.421324', 'step': 3206, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:17.450774', 'step': 3206, 'epoch': 2} {'type': 'loss', 'content': 0.001352613908238709, 'timestamp': '2025-09-10 02:29:17.452703', 'step': 3207, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:17.481562', 'step': 3207, 'epoch': 2} {'type': 'loss', 'content': 0.005524831358343363, 'timestamp': '2025-09-10 02:29:17.505101', 'step': 3208, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:17.534019', 'step': 3208, 'epoch': 2} {'type': 'loss', 'content': 0.0035007346887141466, 'timestamp': '2025-09-10 02:29:17.535912', 'step': 3209, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:17.564767', 'step': 3209, 'epoch': 2} {'type': 'loss', 'content': 0.019017895683646202, 'timestamp': '2025-09-10 02:29:17.566710', 'step': 3210, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:17.595871', 'step': 3210, 'epoch': 2} {'type': 'loss', 'content': 0.000336126220645383, 'timestamp': '2025-09-10 02:29:17.597842', 'step': 3211, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:17.627635', 'step': 3211, 'epoch': 2} {'type': 'loss', 'content': 0.003399396315217018, 'timestamp': '2025-09-10 02:29:17.651049', 'step': 3212, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:17.679881', 'step': 3212, 'epoch': 2} {'type': 'loss', 'content': 0.003790304297581315, 'timestamp': '2025-09-10 02:29:17.681842', 'step': 3213, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:17.710718', 'step': 3213, 'epoch': 2} {'type': 'loss', 'content': 0.02069282904267311, 'timestamp': '2025-09-10 02:29:17.712728', 'step': 3214, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:17.741572', 'step': 3214, 'epoch': 2} {'type': 'loss', 'content': 0.054744090884923935, 'timestamp': '2025-09-10 02:29:17.743517', 'step': 3215, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:17.771868', 'step': 3215, 'epoch': 2} {'type': 'loss', 'content': 0.04676416888833046, 'timestamp': '2025-09-10 02:29:17.795407', 'step': 3216, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:17.824307', 'step': 3216, 'epoch': 2} {'type': 'loss', 'content': 0.030757104977965355, 'timestamp': '2025-09-10 02:29:17.826475', 'step': 3217, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:17.855866', 'step': 3217, 'epoch': 2} {'type': 'loss', 'content': 0.026507088914513588, 'timestamp': '2025-09-10 02:29:17.857790', 'step': 3218, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:17.886091', 'step': 3218, 'epoch': 2} {'type': 'loss', 'content': 0.008432825095951557, 'timestamp': '2025-09-10 02:29:17.888081', 'step': 3219, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:17.916796', 'step': 3219, 'epoch': 2} {'type': 'loss', 'content': 0.0012788543244823813, 'timestamp': '2025-09-10 02:29:17.940030', 'step': 3220, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:17.969161', 'step': 3220, 'epoch': 2} {'type': 'loss', 'content': 0.001838106312789023, 'timestamp': '2025-09-10 02:29:17.970967', 'step': 3221, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:17.999599', 'step': 3221, 'epoch': 2} {'type': 'loss', 'content': 0.009462250396609306, 'timestamp': '2025-09-10 02:29:18.001572', 'step': 3222, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:18.030394', 'step': 3222, 'epoch': 2} {'type': 'loss', 'content': 0.006963053252547979, 'timestamp': '2025-09-10 02:29:18.032590', 'step': 3223, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:18.061360', 'step': 3223, 'epoch': 2} {'type': 'loss', 'content': 0.04160519689321518, 'timestamp': '2025-09-10 02:29:18.085619', 'step': 3224, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:18.115376', 'step': 3224, 'epoch': 2} {'type': 'loss', 'content': 0.037564489990472794, 'timestamp': '2025-09-10 02:29:18.118287', 'step': 3225, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:18.149599', 'step': 3225, 'epoch': 2} {'type': 'loss', 'content': 0.0014786241808906198, 'timestamp': '2025-09-10 02:29:18.151913', 'step': 3226, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:18.180635', 'step': 3226, 'epoch': 2} {'type': 'loss', 'content': 0.041148122400045395, 'timestamp': '2025-09-10 02:29:18.186675', 'step': 3227, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:18.218178', 'step': 3227, 'epoch': 2} {'type': 'loss', 'content': 0.026159171015024185, 'timestamp': '2025-09-10 02:29:18.241412', 'step': 3228, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:18.272812', 'step': 3228, 'epoch': 2} {'type': 'loss', 'content': 0.005518940277397633, 'timestamp': '2025-09-10 02:29:18.275993', 'step': 3229, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:18.306503', 'step': 3229, 'epoch': 2} {'type': 'loss', 'content': 0.03149283677339554, 'timestamp': '2025-09-10 02:29:18.309682', 'step': 3230, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:18.339620', 'step': 3230, 'epoch': 2} {'type': 'loss', 'content': 0.004583963192999363, 'timestamp': '2025-09-10 02:29:18.341457', 'step': 3231, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:18.374374', 'step': 3231, 'epoch': 2} {'type': 'loss', 'content': 0.002183470642194152, 'timestamp': '2025-09-10 02:29:18.397966', 'step': 3232, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:18.429247', 'step': 3232, 'epoch': 2} {'type': 'loss', 'content': 0.005411368329077959, 'timestamp': '2025-09-10 02:29:18.431361', 'step': 3233, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:18.460598', 'step': 3233, 'epoch': 2} {'type': 'loss', 'content': 0.024356532841920853, 'timestamp': '2025-09-10 02:29:18.462430', 'step': 3234, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:18.491545', 'step': 3234, 'epoch': 2} {'type': 'loss', 'content': 0.005300631280988455, 'timestamp': '2025-09-10 02:29:18.493598', 'step': 3235, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:18.522878', 'step': 3235, 'epoch': 2} {'type': 'loss', 'content': 0.0028388681821525097, 'timestamp': '2025-09-10 02:29:18.546481', 'step': 3236, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:18.575974', 'step': 3236, 'epoch': 2} {'type': 'loss', 'content': 0.0018885548925027251, 'timestamp': '2025-09-10 02:29:18.577886', 'step': 3237, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:18.606683', 'step': 3237, 'epoch': 2} {'type': 'loss', 'content': 0.0013392126420512795, 'timestamp': '2025-09-10 02:29:18.608355', 'step': 3238, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:18.637961', 'step': 3238, 'epoch': 2} {'type': 'loss', 'content': 0.005057408940047026, 'timestamp': '2025-09-10 02:29:18.639796', 'step': 3239, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:18.669231', 'step': 3239, 'epoch': 2} {'type': 'loss', 'content': 0.06259658187627792, 'timestamp': '2025-09-10 02:29:18.693014', 'step': 3240, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:18.726939', 'step': 3240, 'epoch': 2} {'type': 'loss', 'content': 0.002293581375852227, 'timestamp': '2025-09-10 02:29:18.728991', 'step': 3241, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:18.763746', 'step': 3241, 'epoch': 2} {'type': 'loss', 'content': 0.002704497892409563, 'timestamp': '2025-09-10 02:29:18.765808', 'step': 3242, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:18.794282', 'step': 3242, 'epoch': 2} {'type': 'loss', 'content': 0.013686344027519226, 'timestamp': '2025-09-10 02:29:18.796458', 'step': 3243, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:18.825726', 'step': 3243, 'epoch': 2} {'type': 'loss', 'content': 0.040091224014759064, 'timestamp': '2025-09-10 02:29:18.849135', 'step': 3244, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:18.878808', 'step': 3244, 'epoch': 2} {'type': 'loss', 'content': 0.027681689709424973, 'timestamp': '2025-09-10 02:29:18.880902', 'step': 3245, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:18.909903', 'step': 3245, 'epoch': 2} {'type': 'loss', 'content': 0.002253680257126689, 'timestamp': '2025-09-10 02:29:18.911965', 'step': 3246, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:18.940435', 'step': 3246, 'epoch': 2} {'type': 'loss', 'content': 0.0012212129076942801, 'timestamp': '2025-09-10 02:29:18.942443', 'step': 3247, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:18.971327', 'step': 3247, 'epoch': 2} {'type': 'loss', 'content': 0.016619175672531128, 'timestamp': '2025-09-10 02:29:18.995032', 'step': 3248, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:19.024290', 'step': 3248, 'epoch': 2} {'type': 'loss', 'content': 0.006138200405985117, 'timestamp': '2025-09-10 02:29:19.026251', 'step': 3249, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:19.055531', 'step': 3249, 'epoch': 2} {'type': 'loss', 'content': 0.0033281215000897646, 'timestamp': '2025-09-10 02:29:19.057400', 'step': 3250, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:19.086689', 'step': 3250, 'epoch': 2} {'type': 'loss', 'content': 0.004846022929996252, 'timestamp': '2025-09-10 02:29:19.088714', 'step': 3251, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:19.117724', 'step': 3251, 'epoch': 2} {'type': 'loss', 'content': 0.004795127082616091, 'timestamp': '2025-09-10 02:29:19.141395', 'step': 3252, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:19.170436', 'step': 3252, 'epoch': 2} {'type': 'loss', 'content': 0.023141562938690186, 'timestamp': '2025-09-10 02:29:19.172389', 'step': 3253, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:19.201348', 'step': 3253, 'epoch': 2} {'type': 'loss', 'content': 0.005410187877714634, 'timestamp': '2025-09-10 02:29:19.203159', 'step': 3254, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:19.231950', 'step': 3254, 'epoch': 2} {'type': 'loss', 'content': 0.0019504806259647012, 'timestamp': '2025-09-10 02:29:19.234013', 'step': 3255, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:19.264410', 'step': 3255, 'epoch': 2} {'type': 'loss', 'content': 0.0038690518122166395, 'timestamp': '2025-09-10 02:29:19.287874', 'step': 3256, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:19.316997', 'step': 3256, 'epoch': 2} {'type': 'loss', 'content': 0.0039766039699316025, 'timestamp': '2025-09-10 02:29:19.318871', 'step': 3257, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:19.347807', 'step': 3257, 'epoch': 2} {'type': 'loss', 'content': 0.008529971353709698, 'timestamp': '2025-09-10 02:29:19.349776', 'step': 3258, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:19.379128', 'step': 3258, 'epoch': 2} {'type': 'loss', 'content': 0.0007365807541646063, 'timestamp': '2025-09-10 02:29:19.381049', 'step': 3259, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:19.409764', 'step': 3259, 'epoch': 2} {'type': 'loss', 'content': 0.006635564845055342, 'timestamp': '2025-09-10 02:29:19.433404', 'step': 3260, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:19.462525', 'step': 3260, 'epoch': 2} {'type': 'loss', 'content': 0.019494740292429924, 'timestamp': '2025-09-10 02:29:19.464652', 'step': 3261, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:19.493866', 'step': 3261, 'epoch': 2} {'type': 'loss', 'content': 0.007422385271638632, 'timestamp': '2025-09-10 02:29:19.495885', 'step': 3262, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:19.524614', 'step': 3262, 'epoch': 2} {'type': 'loss', 'content': 0.016547175124287605, 'timestamp': '2025-09-10 02:29:19.526523', 'step': 3263, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:19.555385', 'step': 3263, 'epoch': 2} {'type': 'loss', 'content': 0.0021990910172462463, 'timestamp': '2025-09-10 02:29:19.578769', 'step': 3264, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:19.607839', 'step': 3264, 'epoch': 2} {'type': 'loss', 'content': 0.01675112545490265, 'timestamp': '2025-09-10 02:29:19.609665', 'step': 3265, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:19.638344', 'step': 3265, 'epoch': 2} {'type': 'loss', 'content': 0.027797266840934753, 'timestamp': '2025-09-10 02:29:19.640283', 'step': 3266, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:19.668865', 'step': 3266, 'epoch': 2} {'type': 'loss', 'content': 0.019333451986312866, 'timestamp': '2025-09-10 02:29:19.670665', 'step': 3267, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:19.699287', 'step': 3267, 'epoch': 2} {'type': 'loss', 'content': 0.02450748346745968, 'timestamp': '2025-09-10 02:29:19.723794', 'step': 3268, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:19.752733', 'step': 3268, 'epoch': 2} {'type': 'loss', 'content': 0.03439665958285332, 'timestamp': '2025-09-10 02:29:19.754467', 'step': 3269, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:19.785703', 'step': 3269, 'epoch': 2} {'type': 'loss', 'content': 0.03972681984305382, 'timestamp': '2025-09-10 02:29:19.787744', 'step': 3270, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:19.816911', 'step': 3270, 'epoch': 2} {'type': 'loss', 'content': 0.003959035966545343, 'timestamp': '2025-09-10 02:29:19.818959', 'step': 3271, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:19.848244', 'step': 3271, 'epoch': 2} {'type': 'loss', 'content': 0.007591621018946171, 'timestamp': '2025-09-10 02:29:19.871710', 'step': 3272, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:19.900934', 'step': 3272, 'epoch': 2} {'type': 'loss', 'content': 0.009627032093703747, 'timestamp': '2025-09-10 02:29:19.902951', 'step': 3273, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:19.931807', 'step': 3273, 'epoch': 2} {'type': 'loss', 'content': 0.006072872783988714, 'timestamp': '2025-09-10 02:29:19.933602', 'step': 3274, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:19.963636', 'step': 3274, 'epoch': 2} {'type': 'loss', 'content': 0.0012307605938985944, 'timestamp': '2025-09-10 02:29:19.965723', 'step': 3275, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:19.994817', 'step': 3275, 'epoch': 2} {'type': 'loss', 'content': 0.010460141114890575, 'timestamp': '2025-09-10 02:29:20.020237', 'step': 3276, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:20.049699', 'step': 3276, 'epoch': 2} {'type': 'loss', 'content': 0.009266098029911518, 'timestamp': '2025-09-10 02:29:20.051892', 'step': 3277, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:20.080640', 'step': 3277, 'epoch': 2} {'type': 'loss', 'content': 0.001034657354466617, 'timestamp': '2025-09-10 02:29:20.082667', 'step': 3278, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:20.111708', 'step': 3278, 'epoch': 2} {'type': 'loss', 'content': 0.00271918554790318, 'timestamp': '2025-09-10 02:29:20.113677', 'step': 3279, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:20.142309', 'step': 3279, 'epoch': 2} {'type': 'loss', 'content': 0.0036298034247010946, 'timestamp': '2025-09-10 02:29:20.165832', 'step': 3280, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:20.194624', 'step': 3280, 'epoch': 2} {'type': 'loss', 'content': 0.0019971595611423254, 'timestamp': '2025-09-10 02:29:20.196659', 'step': 3281, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:20.225641', 'step': 3281, 'epoch': 2} {'type': 'loss', 'content': 0.0014646511990576982, 'timestamp': '2025-09-10 02:29:20.227621', 'step': 3282, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:20.256695', 'step': 3282, 'epoch': 2} {'type': 'loss', 'content': 0.04680074006319046, 'timestamp': '2025-09-10 02:29:20.258589', 'step': 3283, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:20.287414', 'step': 3283, 'epoch': 2} {'type': 'loss', 'content': 0.003970776218920946, 'timestamp': '2025-09-10 02:29:20.311023', 'step': 3284, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:20.339930', 'step': 3284, 'epoch': 2} {'type': 'loss', 'content': 0.0027751787565648556, 'timestamp': '2025-09-10 02:29:20.341953', 'step': 3285, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:20.370591', 'step': 3285, 'epoch': 2} {'type': 'loss', 'content': 0.005944137927144766, 'timestamp': '2025-09-10 02:29:20.372485', 'step': 3286, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:20.401444', 'step': 3286, 'epoch': 2} {'type': 'loss', 'content': 0.0031202025711536407, 'timestamp': '2025-09-10 02:29:20.403237', 'step': 3287, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:20.431908', 'step': 3287, 'epoch': 2} {'type': 'loss', 'content': 0.002764130476862192, 'timestamp': '2025-09-10 02:29:20.455458', 'step': 3288, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:20.484395', 'step': 3288, 'epoch': 2} {'type': 'loss', 'content': 0.001558019663207233, 'timestamp': '2025-09-10 02:29:20.486441', 'step': 3289, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:20.515587', 'step': 3289, 'epoch': 2} {'type': 'loss', 'content': 0.0013384289341047406, 'timestamp': '2025-09-10 02:29:20.517572', 'step': 3290, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:20.546405', 'step': 3290, 'epoch': 2} {'type': 'loss', 'content': 0.017970601096749306, 'timestamp': '2025-09-10 02:29:20.548489', 'step': 3291, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:20.577071', 'step': 3291, 'epoch': 2} {'type': 'loss', 'content': 0.0016688795294612646, 'timestamp': '2025-09-10 02:29:20.600611', 'step': 3292, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:20.629800', 'step': 3292, 'epoch': 2} {'type': 'loss', 'content': 0.004329177085310221, 'timestamp': '2025-09-10 02:29:20.631806', 'step': 3293, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:20.661082', 'step': 3293, 'epoch': 2} {'type': 'loss', 'content': 0.04710295423865318, 'timestamp': '2025-09-10 02:29:20.662935', 'step': 3294, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:20.691674', 'step': 3294, 'epoch': 2} {'type': 'loss', 'content': 0.01943361759185791, 'timestamp': '2025-09-10 02:29:20.693671', 'step': 3295, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:20.722401', 'step': 3295, 'epoch': 2} {'type': 'loss', 'content': 0.030896736308932304, 'timestamp': '2025-09-10 02:29:20.745977', 'step': 3296, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:20.774635', 'step': 3296, 'epoch': 2} {'type': 'loss', 'content': 0.009174938313663006, 'timestamp': '2025-09-10 02:29:20.776608', 'step': 3297, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:20.805590', 'step': 3297, 'epoch': 2} {'type': 'loss', 'content': 0.0020134812220931053, 'timestamp': '2025-09-10 02:29:20.807513', 'step': 3298, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:20.836466', 'step': 3298, 'epoch': 2} {'type': 'loss', 'content': 0.007612983230501413, 'timestamp': '2025-09-10 02:29:20.838503', 'step': 3299, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:20.876634', 'step': 3299, 'epoch': 2} {'type': 'loss', 'content': 0.004460458178073168, 'timestamp': '2025-09-10 02:29:20.899956', 'step': 3300, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:20.930738', 'step': 3300, 'epoch': 2} {'type': 'loss', 'content': 0.011855359189212322, 'timestamp': '2025-09-10 02:29:20.932666', 'step': 3301, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:20.961791', 'step': 3301, 'epoch': 2} {'type': 'loss', 'content': 0.0030626137740910053, 'timestamp': '2025-09-10 02:29:20.963783', 'step': 3302, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:20.993081', 'step': 3302, 'epoch': 2} {'type': 'loss', 'content': 0.0014373933663591743, 'timestamp': '2025-09-10 02:29:20.994947', 'step': 3303, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:21.028196', 'step': 3303, 'epoch': 2} {'type': 'loss', 'content': 0.0031409133225679398, 'timestamp': '2025-09-10 02:29:21.051828', 'step': 3304, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:21.081688', 'step': 3304, 'epoch': 2} {'type': 'loss', 'content': 0.025139452889561653, 'timestamp': '2025-09-10 02:29:21.083765', 'step': 3305, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:21.113113', 'step': 3305, 'epoch': 2} {'type': 'loss', 'content': 0.00456382567062974, 'timestamp': '2025-09-10 02:29:21.115199', 'step': 3306, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:21.145594', 'step': 3306, 'epoch': 2} {'type': 'loss', 'content': 0.0022305515594780445, 'timestamp': '2025-09-10 02:29:21.147349', 'step': 3307, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:21.175544', 'step': 3307, 'epoch': 2} {'type': 'loss', 'content': 0.012439766898751259, 'timestamp': '2025-09-10 02:29:21.199613', 'step': 3308, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:21.229185', 'step': 3308, 'epoch': 2} {'type': 'loss', 'content': 0.05707881227135658, 'timestamp': '2025-09-10 02:29:21.230964', 'step': 3309, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:21.259353', 'step': 3309, 'epoch': 2} {'type': 'loss', 'content': 0.0033739774953573942, 'timestamp': '2025-09-10 02:29:21.261142', 'step': 3310, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:21.289987', 'step': 3310, 'epoch': 2} {'type': 'loss', 'content': 0.00657115550711751, 'timestamp': '2025-09-10 02:29:21.291900', 'step': 3311, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:21.319950', 'step': 3311, 'epoch': 2} {'type': 'loss', 'content': 0.0016374706756323576, 'timestamp': '2025-09-10 02:29:21.343304', 'step': 3312, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:21.373855', 'step': 3312, 'epoch': 2} {'type': 'loss', 'content': 0.05689441040158272, 'timestamp': '2025-09-10 02:29:21.375788', 'step': 3313, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:21.404298', 'step': 3313, 'epoch': 2} {'type': 'loss', 'content': 0.00048739396152086556, 'timestamp': '2025-09-10 02:29:21.408079', 'step': 3314, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:21.440032', 'step': 3314, 'epoch': 2} {'type': 'loss', 'content': 0.0032219900749623775, 'timestamp': '2025-09-10 02:29:21.441891', 'step': 3315, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:21.470535', 'step': 3315, 'epoch': 2} {'type': 'loss', 'content': 0.018400685861706734, 'timestamp': '2025-09-10 02:29:21.494186', 'step': 3316, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:21.522898', 'step': 3316, 'epoch': 2} {'type': 'loss', 'content': 0.008098559454083443, 'timestamp': '2025-09-10 02:29:21.524809', 'step': 3317, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:21.553205', 'step': 3317, 'epoch': 2} {'type': 'loss', 'content': 0.00045300991041585803, 'timestamp': '2025-09-10 02:29:21.554975', 'step': 3318, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:21.583810', 'step': 3318, 'epoch': 2} {'type': 'loss', 'content': 0.0014719769824296236, 'timestamp': '2025-09-10 02:29:21.586246', 'step': 3319, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:21.615211', 'step': 3319, 'epoch': 2} {'type': 'loss', 'content': 0.0005743346991948783, 'timestamp': '2025-09-10 02:29:21.638931', 'step': 3320, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:21.668418', 'step': 3320, 'epoch': 2} {'type': 'loss', 'content': 0.001873001572676003, 'timestamp': '2025-09-10 02:29:21.670413', 'step': 3321, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:21.701958', 'step': 3321, 'epoch': 2} {'type': 'loss', 'content': 0.004645978100597858, 'timestamp': '2025-09-10 02:29:21.707690', 'step': 3322, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:21.736701', 'step': 3322, 'epoch': 2} {'type': 'loss', 'content': 0.006194022949784994, 'timestamp': '2025-09-10 02:29:21.738440', 'step': 3323, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:21.767046', 'step': 3323, 'epoch': 2} {'type': 'loss', 'content': 0.06860353797674179, 'timestamp': '2025-09-10 02:29:21.793182', 'step': 3324, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:21.822251', 'step': 3324, 'epoch': 2} {'type': 'loss', 'content': 0.05633767321705818, 'timestamp': '2025-09-10 02:29:21.824103', 'step': 3325, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:21.852370', 'step': 3325, 'epoch': 2} {'type': 'loss', 'content': 0.0016861387994140387, 'timestamp': '2025-09-10 02:29:21.854482', 'step': 3326, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:29:21.883306', 'step': 3326, 'epoch': 2} {'type': 'loss', 'content': 0.023309985175728798, 'timestamp': '2025-09-10 02:29:21.885907', 'step': 3327, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:21.916141', 'step': 3327, 'epoch': 2} {'type': 'loss', 'content': 0.001437532133422792, 'timestamp': '2025-09-10 02:29:21.939522', 'step': 3328, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:21.968639', 'step': 3328, 'epoch': 2} {'type': 'loss', 'content': 0.004248501267284155, 'timestamp': '2025-09-10 02:29:21.970328', 'step': 3329, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:21.998691', 'step': 3329, 'epoch': 2} {'type': 'loss', 'content': 0.059723686426877975, 'timestamp': '2025-09-10 02:29:22.000595', 'step': 3330, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:22.029082', 'step': 3330, 'epoch': 2} {'type': 'loss', 'content': 0.08785872161388397, 'timestamp': '2025-09-10 02:29:22.030873', 'step': 3331, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:22.061330', 'step': 3331, 'epoch': 2} {'type': 'loss', 'content': 0.011735972948372364, 'timestamp': '2025-09-10 02:29:22.093551', 'step': 3332, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:22.145068', 'step': 3332, 'epoch': 2} {'type': 'loss', 'content': 0.001344985910691321, 'timestamp': '2025-09-10 02:29:22.151334', 'step': 3333, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:22.185770', 'step': 3333, 'epoch': 2} {'type': 'loss', 'content': 0.042980439960956573, 'timestamp': '2025-09-10 02:29:22.187798', 'step': 3334, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:22.216526', 'step': 3334, 'epoch': 2} {'type': 'loss', 'content': 0.06018718704581261, 'timestamp': '2025-09-10 02:29:22.218495', 'step': 3335, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:22.249027', 'step': 3335, 'epoch': 2} {'type': 'loss', 'content': 0.016707923263311386, 'timestamp': '2025-09-10 02:29:22.272396', 'step': 3336, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:22.301550', 'step': 3336, 'epoch': 2} {'type': 'loss', 'content': 0.0011664006160572171, 'timestamp': '2025-09-10 02:29:22.303260', 'step': 3337, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:22.332099', 'step': 3337, 'epoch': 2} {'type': 'loss', 'content': 0.000589015893638134, 'timestamp': '2025-09-10 02:29:22.336015', 'step': 3338, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:22.364893', 'step': 3338, 'epoch': 2} {'type': 'loss', 'content': 0.0005039245006628335, 'timestamp': '2025-09-10 02:29:22.366881', 'step': 3339, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:22.395372', 'step': 3339, 'epoch': 2} {'type': 'loss', 'content': 0.0020321940537542105, 'timestamp': '2025-09-10 02:29:22.418714', 'step': 3340, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:22.446866', 'step': 3340, 'epoch': 2} {'type': 'loss', 'content': 0.0007291255169548094, 'timestamp': '2025-09-10 02:29:22.450296', 'step': 3341, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:22.479103', 'step': 3341, 'epoch': 2} {'type': 'loss', 'content': 0.02235017530620098, 'timestamp': '2025-09-10 02:29:22.481093', 'step': 3342, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:22.509191', 'step': 3342, 'epoch': 2} {'type': 'loss', 'content': 0.003876918461173773, 'timestamp': '2025-09-10 02:29:22.510993', 'step': 3343, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:22.539293', 'step': 3343, 'epoch': 2} {'type': 'loss', 'content': 0.001926369033753872, 'timestamp': '2025-09-10 02:29:22.562946', 'step': 3344, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1582003754624}], 'timestamp': '2025-09-10 02:29:24.434568', 'step': 3344, 'epoch': 2} {'type': 'pplx', 'content': 3018983.5664545535, 'timestamp': '2025-09-10 02:29:24.436472', 'step': 3344, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:24.464549', 'step': 3344, 'epoch': 2} {'type': 'loss', 'content': 0.028784185647964478, 'timestamp': '2025-09-10 02:29:24.466555', 'step': 3345, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:24.495671', 'step': 3345, 'epoch': 2} {'type': 'loss', 'content': 0.0049285888671875, 'timestamp': '2025-09-10 02:29:24.497669', 'step': 3346, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:24.527189', 'step': 3346, 'epoch': 2} {'type': 'loss', 'content': 0.004780747927725315, 'timestamp': '2025-09-10 02:29:24.528928', 'step': 3347, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:24.557732', 'step': 3347, 'epoch': 2} {'type': 'loss', 'content': 0.006687031593173742, 'timestamp': '2025-09-10 02:29:24.581511', 'step': 3348, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:24.611395', 'step': 3348, 'epoch': 2} {'type': 'loss', 'content': 0.008859639056026936, 'timestamp': '2025-09-10 02:29:24.613445', 'step': 3349, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:24.643085', 'step': 3349, 'epoch': 2} {'type': 'loss', 'content': 0.002748832106590271, 'timestamp': '2025-09-10 02:29:24.644900', 'step': 3350, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:24.673708', 'step': 3350, 'epoch': 2} {'type': 'loss', 'content': 0.0011055845534428954, 'timestamp': '2025-09-10 02:29:24.675564', 'step': 3351, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:24.704312', 'step': 3351, 'epoch': 2} {'type': 'loss', 'content': 0.022624919191002846, 'timestamp': '2025-09-10 02:29:24.727930', 'step': 3352, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:24.757199', 'step': 3352, 'epoch': 2} {'type': 'loss', 'content': 0.00422668969258666, 'timestamp': '2025-09-10 02:29:24.758983', 'step': 3353, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:24.787702', 'step': 3353, 'epoch': 2} {'type': 'loss', 'content': 0.03384550288319588, 'timestamp': '2025-09-10 02:29:24.789562', 'step': 3354, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:24.818420', 'step': 3354, 'epoch': 2} {'type': 'loss', 'content': 0.00036209248355589807, 'timestamp': '2025-09-10 02:29:24.820458', 'step': 3355, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:24.849795', 'step': 3355, 'epoch': 2} {'type': 'loss', 'content': 0.004498614929616451, 'timestamp': '2025-09-10 02:29:24.873476', 'step': 3356, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:24.903182', 'step': 3356, 'epoch': 2} {'type': 'loss', 'content': 0.05434301495552063, 'timestamp': '2025-09-10 02:29:24.905219', 'step': 3357, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:24.934709', 'step': 3357, 'epoch': 2} {'type': 'loss', 'content': 0.0021592320408672094, 'timestamp': '2025-09-10 02:29:24.936839', 'step': 3358, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:24.966005', 'step': 3358, 'epoch': 2} {'type': 'loss', 'content': 0.025666924193501472, 'timestamp': '2025-09-10 02:29:24.967846', 'step': 3359, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:24.996816', 'step': 3359, 'epoch': 2} {'type': 'loss', 'content': 0.004111967049539089, 'timestamp': '2025-09-10 02:29:25.020272', 'step': 3360, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:25.049379', 'step': 3360, 'epoch': 2} {'type': 'loss', 'content': 0.016234159469604492, 'timestamp': '2025-09-10 02:29:25.051309', 'step': 3361, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:25.079988', 'step': 3361, 'epoch': 2} {'type': 'loss', 'content': 0.00322941062040627, 'timestamp': '2025-09-10 02:29:25.081983', 'step': 3362, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:25.110693', 'step': 3362, 'epoch': 2} {'type': 'loss', 'content': 0.009945042431354523, 'timestamp': '2025-09-10 02:29:25.112523', 'step': 3363, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:25.141320', 'step': 3363, 'epoch': 2} {'type': 'loss', 'content': 0.01008332334458828, 'timestamp': '2025-09-10 02:29:25.164925', 'step': 3364, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:25.194028', 'step': 3364, 'epoch': 2} {'type': 'loss', 'content': 0.03316834196448326, 'timestamp': '2025-09-10 02:29:25.195770', 'step': 3365, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:25.224273', 'step': 3365, 'epoch': 2} {'type': 'loss', 'content': 0.031795572489500046, 'timestamp': '2025-09-10 02:29:25.226013', 'step': 3366, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:25.254808', 'step': 3366, 'epoch': 2} {'type': 'loss', 'content': 0.001609918661415577, 'timestamp': '2025-09-10 02:29:25.256805', 'step': 3367, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:25.285413', 'step': 3367, 'epoch': 2} {'type': 'loss', 'content': 0.04161988943815231, 'timestamp': '2025-09-10 02:29:25.308826', 'step': 3368, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:25.337829', 'step': 3368, 'epoch': 2} {'type': 'loss', 'content': 0.006874702405184507, 'timestamp': '2025-09-10 02:29:25.340013', 'step': 3369, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:25.368830', 'step': 3369, 'epoch': 2} {'type': 'loss', 'content': 0.004394261632114649, 'timestamp': '2025-09-10 02:29:25.370791', 'step': 3370, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:25.399231', 'step': 3370, 'epoch': 2} {'type': 'loss', 'content': 0.0316348522901535, 'timestamp': '2025-09-10 02:29:25.401226', 'step': 3371, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:25.430278', 'step': 3371, 'epoch': 2} {'type': 'loss', 'content': 0.017396869137883186, 'timestamp': '2025-09-10 02:29:25.453715', 'step': 3372, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:25.483241', 'step': 3372, 'epoch': 2} {'type': 'loss', 'content': 0.03450683131814003, 'timestamp': '2025-09-10 02:29:25.485245', 'step': 3373, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:25.514628', 'step': 3373, 'epoch': 2} {'type': 'loss', 'content': 0.03623649477958679, 'timestamp': '2025-09-10 02:29:25.517827', 'step': 3374, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:25.549368', 'step': 3374, 'epoch': 2} {'type': 'loss', 'content': 0.010904884897172451, 'timestamp': '2025-09-10 02:29:25.551104', 'step': 3375, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:25.579987', 'step': 3375, 'epoch': 2} {'type': 'loss', 'content': 0.02146339975297451, 'timestamp': '2025-09-10 02:29:25.603634', 'step': 3376, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:25.632537', 'step': 3376, 'epoch': 2} {'type': 'loss', 'content': 0.005433398764580488, 'timestamp': '2025-09-10 02:29:25.634841', 'step': 3377, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:25.663655', 'step': 3377, 'epoch': 2} {'type': 'loss', 'content': 0.0025793889071792364, 'timestamp': '2025-09-10 02:29:25.665886', 'step': 3378, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:25.694966', 'step': 3378, 'epoch': 2} {'type': 'loss', 'content': 0.009922388009727001, 'timestamp': '2025-09-10 02:29:25.697144', 'step': 3379, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:25.726561', 'step': 3379, 'epoch': 2} {'type': 'loss', 'content': 0.007967361249029636, 'timestamp': '2025-09-10 02:29:25.749978', 'step': 3380, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:25.778805', 'step': 3380, 'epoch': 2} {'type': 'loss', 'content': 0.0033568316139280796, 'timestamp': '2025-09-10 02:29:25.780780', 'step': 3381, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:25.809391', 'step': 3381, 'epoch': 2} {'type': 'loss', 'content': 0.001999011030420661, 'timestamp': '2025-09-10 02:29:25.811136', 'step': 3382, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:25.839904', 'step': 3382, 'epoch': 2} {'type': 'loss', 'content': 0.013739910908043385, 'timestamp': '2025-09-10 02:29:25.841594', 'step': 3383, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:25.870836', 'step': 3383, 'epoch': 2} {'type': 'loss', 'content': 0.011606521904468536, 'timestamp': '2025-09-10 02:29:25.894285', 'step': 3384, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:25.923217', 'step': 3384, 'epoch': 2} {'type': 'loss', 'content': 0.00559838255867362, 'timestamp': '2025-09-10 02:29:25.925016', 'step': 3385, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:25.953905', 'step': 3385, 'epoch': 2} {'type': 'loss', 'content': 0.0021412342321127653, 'timestamp': '2025-09-10 02:29:25.955850', 'step': 3386, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:25.984274', 'step': 3386, 'epoch': 2} {'type': 'loss', 'content': 0.052755001932382584, 'timestamp': '2025-09-10 02:29:25.986171', 'step': 3387, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:26.014878', 'step': 3387, 'epoch': 2} {'type': 'loss', 'content': 0.009612246416509151, 'timestamp': '2025-09-10 02:29:26.038377', 'step': 3388, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:26.067409', 'step': 3388, 'epoch': 2} {'type': 'loss', 'content': 0.0032427285332232714, 'timestamp': '2025-09-10 02:29:26.069349', 'step': 3389, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:26.098329', 'step': 3389, 'epoch': 2} {'type': 'loss', 'content': 0.0020761454943567514, 'timestamp': '2025-09-10 02:29:26.100297', 'step': 3390, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:26.129374', 'step': 3390, 'epoch': 2} {'type': 'loss', 'content': 0.018983175978064537, 'timestamp': '2025-09-10 02:29:26.131126', 'step': 3391, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:26.159621', 'step': 3391, 'epoch': 2} {'type': 'loss', 'content': 0.007009588647633791, 'timestamp': '2025-09-10 02:29:26.183076', 'step': 3392, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:26.211698', 'step': 3392, 'epoch': 2} {'type': 'loss', 'content': 0.021478259935975075, 'timestamp': '2025-09-10 02:29:26.213656', 'step': 3393, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:26.242584', 'step': 3393, 'epoch': 2} {'type': 'loss', 'content': 0.001726645277813077, 'timestamp': '2025-09-10 02:29:26.244408', 'step': 3394, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:26.272996', 'step': 3394, 'epoch': 2} {'type': 'loss', 'content': 0.003632990876212716, 'timestamp': '2025-09-10 02:29:26.274833', 'step': 3395, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:26.303325', 'step': 3395, 'epoch': 2} {'type': 'loss', 'content': 0.011844513937830925, 'timestamp': '2025-09-10 02:29:26.326613', 'step': 3396, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:26.355623', 'step': 3396, 'epoch': 2} {'type': 'loss', 'content': 0.004450249020010233, 'timestamp': '2025-09-10 02:29:26.357462', 'step': 3397, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:26.385908', 'step': 3397, 'epoch': 2} {'type': 'loss', 'content': 0.003970394842326641, 'timestamp': '2025-09-10 02:29:26.387664', 'step': 3398, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:26.416386', 'step': 3398, 'epoch': 2} {'type': 'loss', 'content': 0.0025803286116570234, 'timestamp': '2025-09-10 02:29:26.418209', 'step': 3399, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:26.446717', 'step': 3399, 'epoch': 2} {'type': 'loss', 'content': 0.014798497781157494, 'timestamp': '2025-09-10 02:29:26.470274', 'step': 3400, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:26.499963', 'step': 3400, 'epoch': 2} {'type': 'loss', 'content': 0.03386901691555977, 'timestamp': '2025-09-10 02:29:26.502185', 'step': 3401, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:26.530730', 'step': 3401, 'epoch': 2} {'type': 'loss', 'content': 0.0016055998858064413, 'timestamp': '2025-09-10 02:29:26.532549', 'step': 3402, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:26.561165', 'step': 3402, 'epoch': 2} {'type': 'loss', 'content': 0.016830632463097572, 'timestamp': '2025-09-10 02:29:26.563043', 'step': 3403, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:26.591918', 'step': 3403, 'epoch': 2} {'type': 'loss', 'content': 0.003983858972787857, 'timestamp': '2025-09-10 02:29:26.615365', 'step': 3404, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:26.644936', 'step': 3404, 'epoch': 2} {'type': 'loss', 'content': 0.0032172624487429857, 'timestamp': '2025-09-10 02:29:26.646793', 'step': 3405, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:26.675413', 'step': 3405, 'epoch': 2} {'type': 'loss', 'content': 0.003136297222226858, 'timestamp': '2025-09-10 02:29:26.677273', 'step': 3406, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:26.706878', 'step': 3406, 'epoch': 2} {'type': 'loss', 'content': 0.002121201017871499, 'timestamp': '2025-09-10 02:29:26.708762', 'step': 3407, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:26.737565', 'step': 3407, 'epoch': 2} {'type': 'loss', 'content': 0.001867905491963029, 'timestamp': '2025-09-10 02:29:26.761068', 'step': 3408, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:26.790238', 'step': 3408, 'epoch': 2} {'type': 'loss', 'content': 0.03702418878674507, 'timestamp': '2025-09-10 02:29:26.792230', 'step': 3409, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:26.821867', 'step': 3409, 'epoch': 2} {'type': 'loss', 'content': 0.03472406044602394, 'timestamp': '2025-09-10 02:29:26.823951', 'step': 3410, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:26.852952', 'step': 3410, 'epoch': 2} {'type': 'loss', 'content': 0.002561768749728799, 'timestamp': '2025-09-10 02:29:26.855021', 'step': 3411, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:26.883989', 'step': 3411, 'epoch': 2} {'type': 'loss', 'content': 0.06792227923870087, 'timestamp': '2025-09-10 02:29:26.907367', 'step': 3412, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:26.936732', 'step': 3412, 'epoch': 2} {'type': 'loss', 'content': 0.014536969363689423, 'timestamp': '2025-09-10 02:29:26.938496', 'step': 3413, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:26.967631', 'step': 3413, 'epoch': 2} {'type': 'loss', 'content': 0.016044307500123978, 'timestamp': '2025-09-10 02:29:26.969439', 'step': 3414, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:26.998421', 'step': 3414, 'epoch': 2} {'type': 'loss', 'content': 0.011875098571181297, 'timestamp': '2025-09-10 02:29:27.000541', 'step': 3415, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:27.029503', 'step': 3415, 'epoch': 2} {'type': 'loss', 'content': 0.019096845760941505, 'timestamp': '2025-09-10 02:29:27.053076', 'step': 3416, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:27.081695', 'step': 3416, 'epoch': 2} {'type': 'loss', 'content': 0.0050103445537388325, 'timestamp': '2025-09-10 02:29:27.083502', 'step': 3417, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:27.112350', 'step': 3417, 'epoch': 2} {'type': 'loss', 'content': 0.008464130572974682, 'timestamp': '2025-09-10 02:29:27.114344', 'step': 3418, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:27.143577', 'step': 3418, 'epoch': 2} {'type': 'loss', 'content': 0.010651574470102787, 'timestamp': '2025-09-10 02:29:27.145648', 'step': 3419, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:27.174391', 'step': 3419, 'epoch': 2} {'type': 'loss', 'content': 0.003018937772139907, 'timestamp': '2025-09-10 02:29:27.197966', 'step': 3420, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:27.227185', 'step': 3420, 'epoch': 2} {'type': 'loss', 'content': 0.01083854865282774, 'timestamp': '2025-09-10 02:29:27.229090', 'step': 3421, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:27.257941', 'step': 3421, 'epoch': 2} {'type': 'loss', 'content': 0.003781322157010436, 'timestamp': '2025-09-10 02:29:27.259789', 'step': 3422, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:27.289412', 'step': 3422, 'epoch': 2} {'type': 'loss', 'content': 0.002141769276931882, 'timestamp': '2025-09-10 02:29:27.291203', 'step': 3423, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:27.320929', 'step': 3423, 'epoch': 2} {'type': 'loss', 'content': 0.0017488569719716907, 'timestamp': '2025-09-10 02:29:27.344572', 'step': 3424, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:27.374392', 'step': 3424, 'epoch': 2} {'type': 'loss', 'content': 0.003756160382181406, 'timestamp': '2025-09-10 02:29:27.376298', 'step': 3425, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:27.405695', 'step': 3425, 'epoch': 2} {'type': 'loss', 'content': 0.0471893735229969, 'timestamp': '2025-09-10 02:29:27.407774', 'step': 3426, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:27.436568', 'step': 3426, 'epoch': 2} {'type': 'loss', 'content': 0.001391625963151455, 'timestamp': '2025-09-10 02:29:27.438477', 'step': 3427, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:27.467392', 'step': 3427, 'epoch': 2} {'type': 'loss', 'content': 0.0011366669787093997, 'timestamp': '2025-09-10 02:29:27.490924', 'step': 3428, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:27.519508', 'step': 3428, 'epoch': 2} {'type': 'loss', 'content': 0.029718205332756042, 'timestamp': '2025-09-10 02:29:27.521624', 'step': 3429, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:27.550042', 'step': 3429, 'epoch': 2} {'type': 'loss', 'content': 0.0040368977934122086, 'timestamp': '2025-09-10 02:29:27.552078', 'step': 3430, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:27.580801', 'step': 3430, 'epoch': 2} {'type': 'loss', 'content': 0.029250582680106163, 'timestamp': '2025-09-10 02:29:27.582681', 'step': 3431, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:27.611238', 'step': 3431, 'epoch': 2} {'type': 'loss', 'content': 0.001969092758372426, 'timestamp': '2025-09-10 02:29:27.636732', 'step': 3432, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:27.665333', 'step': 3432, 'epoch': 2} {'type': 'loss', 'content': 0.019174734130501747, 'timestamp': '2025-09-10 02:29:27.671073', 'step': 3433, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:27.700002', 'step': 3433, 'epoch': 2} {'type': 'loss', 'content': 0.002094629453495145, 'timestamp': '2025-09-10 02:29:27.701941', 'step': 3434, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:27.744666', 'step': 3434, 'epoch': 2} {'type': 'loss', 'content': 0.010229520499706268, 'timestamp': '2025-09-10 02:29:27.746496', 'step': 3435, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:27.775354', 'step': 3435, 'epoch': 2} {'type': 'loss', 'content': 0.00620575575158, 'timestamp': '2025-09-10 02:29:27.799553', 'step': 3436, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:27.828802', 'step': 3436, 'epoch': 2} {'type': 'loss', 'content': 0.003741190303117037, 'timestamp': '2025-09-10 02:29:27.830670', 'step': 3437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:27.859548', 'step': 3437, 'epoch': 2} {'type': 'loss', 'content': 0.0019687057938426733, 'timestamp': '2025-09-10 02:29:27.861390', 'step': 3438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:27.890290', 'step': 3438, 'epoch': 2} {'type': 'loss', 'content': 0.002562493784353137, 'timestamp': '2025-09-10 02:29:27.893947', 'step': 3439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:27.922961', 'step': 3439, 'epoch': 2} {'type': 'loss', 'content': 0.002744677709415555, 'timestamp': '2025-09-10 02:29:27.946368', 'step': 3440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:27.975027', 'step': 3440, 'epoch': 2} {'type': 'loss', 'content': 0.029536891728639603, 'timestamp': '2025-09-10 02:29:27.976911', 'step': 3441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:28.005981', 'step': 3441, 'epoch': 2} {'type': 'loss', 'content': 0.0070204222574830055, 'timestamp': '2025-09-10 02:29:28.007875', 'step': 3442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:28.037034', 'step': 3442, 'epoch': 2} {'type': 'loss', 'content': 0.0031619048677384853, 'timestamp': '2025-09-10 02:29:28.038971', 'step': 3443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:28.067874', 'step': 3443, 'epoch': 2} {'type': 'loss', 'content': 0.012876972556114197, 'timestamp': '2025-09-10 02:29:28.094598', 'step': 3444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:28.123625', 'step': 3444, 'epoch': 2} {'type': 'loss', 'content': 0.03683038428425789, 'timestamp': '2025-09-10 02:29:28.126352', 'step': 3445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:28.161716', 'step': 3445, 'epoch': 2} {'type': 'loss', 'content': 0.009324649348855019, 'timestamp': '2025-09-10 02:29:28.163583', 'step': 3446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:28.192594', 'step': 3446, 'epoch': 2} {'type': 'loss', 'content': 0.023574920371174812, 'timestamp': '2025-09-10 02:29:28.194532', 'step': 3447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:28.223050', 'step': 3447, 'epoch': 2} {'type': 'loss', 'content': 0.0015393022913485765, 'timestamp': '2025-09-10 02:29:28.246362', 'step': 3448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:28.275267', 'step': 3448, 'epoch': 2} {'type': 'loss', 'content': 0.006074630655348301, 'timestamp': '2025-09-10 02:29:28.277214', 'step': 3449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:28.306017', 'step': 3449, 'epoch': 2} {'type': 'loss', 'content': 0.0035448498092591763, 'timestamp': '2025-09-10 02:29:28.308477', 'step': 3450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:28.338933', 'step': 3450, 'epoch': 2} {'type': 'loss', 'content': 0.009274295531213284, 'timestamp': '2025-09-10 02:29:28.340891', 'step': 3451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:28.369529', 'step': 3451, 'epoch': 2} {'type': 'loss', 'content': 0.0013662822311744094, 'timestamp': '2025-09-10 02:29:28.393039', 'step': 3452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:28.422352', 'step': 3452, 'epoch': 2} {'type': 'loss', 'content': 0.049186598509550095, 'timestamp': '2025-09-10 02:29:28.426418', 'step': 3453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:28.458183', 'step': 3453, 'epoch': 2} {'type': 'loss', 'content': 0.0017654149560257792, 'timestamp': '2025-09-10 02:29:28.460078', 'step': 3454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:28.489029', 'step': 3454, 'epoch': 2} {'type': 'loss', 'content': 0.0012628829572349787, 'timestamp': '2025-09-10 02:29:28.490998', 'step': 3455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:28.520455', 'step': 3455, 'epoch': 2} {'type': 'loss', 'content': 0.0014435934135690331, 'timestamp': '2025-09-10 02:29:28.545666', 'step': 3456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:28.574820', 'step': 3456, 'epoch': 2} {'type': 'loss', 'content': 0.009639963507652283, 'timestamp': '2025-09-10 02:29:28.576435', 'step': 3457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:28.605398', 'step': 3457, 'epoch': 2} {'type': 'loss', 'content': 0.013706199824810028, 'timestamp': '2025-09-10 02:29:28.607646', 'step': 3458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:28.636564', 'step': 3458, 'epoch': 2} {'type': 'loss', 'content': 0.02213672362267971, 'timestamp': '2025-09-10 02:29:28.638596', 'step': 3459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:28.667385', 'step': 3459, 'epoch': 2} {'type': 'loss', 'content': 0.004982142709195614, 'timestamp': '2025-09-10 02:29:28.690824', 'step': 3460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:28.719813', 'step': 3460, 'epoch': 2} {'type': 'loss', 'content': 0.022070636972784996, 'timestamp': '2025-09-10 02:29:28.721646', 'step': 3461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:28.750575', 'step': 3461, 'epoch': 2} {'type': 'loss', 'content': 0.004302029497921467, 'timestamp': '2025-09-10 02:29:28.752666', 'step': 3462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:28.781357', 'step': 3462, 'epoch': 2} {'type': 'loss', 'content': 0.006409894675016403, 'timestamp': '2025-09-10 02:29:28.783335', 'step': 3463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:28.812156', 'step': 3463, 'epoch': 2} {'type': 'loss', 'content': 0.01828247867524624, 'timestamp': '2025-09-10 02:29:28.835624', 'step': 3464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:28.864026', 'step': 3464, 'epoch': 2} {'type': 'loss', 'content': 0.0010449145920574665, 'timestamp': '2025-09-10 02:29:28.865961', 'step': 3465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:28.894868', 'step': 3465, 'epoch': 2} {'type': 'loss', 'content': 0.0005158040439710021, 'timestamp': '2025-09-10 02:29:28.896894', 'step': 3466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:28.925581', 'step': 3466, 'epoch': 2} {'type': 'loss', 'content': 0.0013656012015417218, 'timestamp': '2025-09-10 02:29:28.927576', 'step': 3467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:28.955832', 'step': 3467, 'epoch': 2} {'type': 'loss', 'content': 0.008575326763093472, 'timestamp': '2025-09-10 02:29:28.979407', 'step': 3468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:29.008062', 'step': 3468, 'epoch': 2} {'type': 'loss', 'content': 0.0006952978437766433, 'timestamp': '2025-09-10 02:29:29.010079', 'step': 3469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:29.038443', 'step': 3469, 'epoch': 2} {'type': 'loss', 'content': 0.0007703721639700234, 'timestamp': '2025-09-10 02:29:29.040388', 'step': 3470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:29.069135', 'step': 3470, 'epoch': 2} {'type': 'loss', 'content': 0.02380296401679516, 'timestamp': '2025-09-10 02:29:29.071078', 'step': 3471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:29.099795', 'step': 3471, 'epoch': 2} {'type': 'loss', 'content': 0.0008592799422331154, 'timestamp': '2025-09-10 02:29:29.123363', 'step': 3472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:29.152575', 'step': 3472, 'epoch': 2} {'type': 'loss', 'content': 0.0031577313784509897, 'timestamp': '2025-09-10 02:29:29.154603', 'step': 3473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:29.184758', 'step': 3473, 'epoch': 2} {'type': 'loss', 'content': 0.0005181062733754516, 'timestamp': '2025-09-10 02:29:29.186931', 'step': 3474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:29.215732', 'step': 3474, 'epoch': 2} {'type': 'loss', 'content': 0.003346957266330719, 'timestamp': '2025-09-10 02:29:29.217573', 'step': 3475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:29.246181', 'step': 3475, 'epoch': 2} {'type': 'loss', 'content': 0.045351382344961166, 'timestamp': '2025-09-10 02:29:29.269556', 'step': 3476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:29.298574', 'step': 3476, 'epoch': 2} {'type': 'loss', 'content': 0.008699034340679646, 'timestamp': '2025-09-10 02:29:29.300537', 'step': 3477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:29.329406', 'step': 3477, 'epoch': 2} {'type': 'loss', 'content': 0.021816948428750038, 'timestamp': '2025-09-10 02:29:29.331524', 'step': 3478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:29.360328', 'step': 3478, 'epoch': 2} {'type': 'loss', 'content': 0.005231685936450958, 'timestamp': '2025-09-10 02:29:29.362178', 'step': 3479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:29.391352', 'step': 3479, 'epoch': 2} {'type': 'loss', 'content': 0.0012015464017167687, 'timestamp': '2025-09-10 02:29:29.414803', 'step': 3480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:29.443695', 'step': 3480, 'epoch': 2} {'type': 'loss', 'content': 0.00045789359137415886, 'timestamp': '2025-09-10 02:29:29.445627', 'step': 3481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:29.474691', 'step': 3481, 'epoch': 2} {'type': 'loss', 'content': 0.0006896085687913001, 'timestamp': '2025-09-10 02:29:29.476601', 'step': 3482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:29.505179', 'step': 3482, 'epoch': 2} {'type': 'loss', 'content': 0.0012523357290774584, 'timestamp': '2025-09-10 02:29:29.507093', 'step': 3483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:29.535753', 'step': 3483, 'epoch': 2} {'type': 'loss', 'content': 0.0377359464764595, 'timestamp': '2025-09-10 02:29:29.559124', 'step': 3484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:29.588202', 'step': 3484, 'epoch': 2} {'type': 'loss', 'content': 0.006449830252677202, 'timestamp': '2025-09-10 02:29:29.590313', 'step': 3485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:29.619051', 'step': 3485, 'epoch': 2} {'type': 'loss', 'content': 0.001721881446428597, 'timestamp': '2025-09-10 02:29:29.621109', 'step': 3486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:29.649769', 'step': 3486, 'epoch': 2} {'type': 'loss', 'content': 0.043385621160268784, 'timestamp': '2025-09-10 02:29:29.651931', 'step': 3487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:29.680690', 'step': 3487, 'epoch': 2} {'type': 'loss', 'content': 0.00024432101054117084, 'timestamp': '2025-09-10 02:29:29.704387', 'step': 3488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:29.733473', 'step': 3488, 'epoch': 2} {'type': 'loss', 'content': 0.032618287950754166, 'timestamp': '2025-09-10 02:29:29.735503', 'step': 3489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:29.764177', 'step': 3489, 'epoch': 2} {'type': 'loss', 'content': 0.025582995265722275, 'timestamp': '2025-09-10 02:29:29.766019', 'step': 3490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:29.794705', 'step': 3490, 'epoch': 2} {'type': 'loss', 'content': 0.03573060408234596, 'timestamp': '2025-09-10 02:29:29.796596', 'step': 3491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:29.825257', 'step': 3491, 'epoch': 2} {'type': 'loss', 'content': 0.0006821600836701691, 'timestamp': '2025-09-10 02:29:29.850923', 'step': 3492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:29.883501', 'step': 3492, 'epoch': 2} {'type': 'loss', 'content': 0.000552928657270968, 'timestamp': '2025-09-10 02:29:29.885544', 'step': 3493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:29.918156', 'step': 3493, 'epoch': 2} {'type': 'loss', 'content': 0.037706244736909866, 'timestamp': '2025-09-10 02:29:29.926313', 'step': 3494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:29.955427', 'step': 3494, 'epoch': 2} {'type': 'loss', 'content': 0.0017515952931717038, 'timestamp': '2025-09-10 02:29:29.957296', 'step': 3495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:29.986401', 'step': 3495, 'epoch': 2} {'type': 'loss', 'content': 0.0004756085982080549, 'timestamp': '2025-09-10 02:29:30.012731', 'step': 3496, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1582003754624}], 'timestamp': '2025-09-10 02:29:32.030591', 'step': 3496, 'epoch': 2} {'type': 'pplx', 'content': 2477887.027473765, 'timestamp': '2025-09-10 02:29:32.032529', 'step': 3496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:32.059958', 'step': 3496, 'epoch': 2} {'type': 'loss', 'content': 0.001944307005032897, 'timestamp': '2025-09-10 02:29:32.062180', 'step': 3497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:32.090925', 'step': 3497, 'epoch': 2} {'type': 'loss', 'content': 0.0046856580302119255, 'timestamp': '2025-09-10 02:29:32.092885', 'step': 3498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:32.121881', 'step': 3498, 'epoch': 2} {'type': 'loss', 'content': 0.04017262905836105, 'timestamp': '2025-09-10 02:29:32.123950', 'step': 3499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:32.152564', 'step': 3499, 'epoch': 2} {'type': 'loss', 'content': 0.0051589831709861755, 'timestamp': '2025-09-10 02:29:32.176088', 'step': 3500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 3500', 'timestamp': '2025-09-10 02:29:36.516402', 'step': 3500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:36.547565', 'step': 3500, 'epoch': 2} {'type': 'loss', 'content': 0.0038563567213714123, 'timestamp': '2025-09-10 02:29:36.549556', 'step': 3501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:36.578524', 'step': 3501, 'epoch': 2} {'type': 'loss', 'content': 0.004655842669308186, 'timestamp': '2025-09-10 02:29:36.580537', 'step': 3502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:36.608966', 'step': 3502, 'epoch': 2} {'type': 'loss', 'content': 0.0008553729276172817, 'timestamp': '2025-09-10 02:29:36.610908', 'step': 3503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:36.639406', 'step': 3503, 'epoch': 2} {'type': 'loss', 'content': 0.00034527681418694556, 'timestamp': '2025-09-10 02:29:36.663059', 'step': 3504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:36.692017', 'step': 3504, 'epoch': 2} {'type': 'loss', 'content': 0.00033239685581065714, 'timestamp': '2025-09-10 02:29:36.694041', 'step': 3505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:36.722970', 'step': 3505, 'epoch': 2} {'type': 'loss', 'content': 0.013092617504298687, 'timestamp': '2025-09-10 02:29:36.725097', 'step': 3506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:36.753859', 'step': 3506, 'epoch': 2} {'type': 'loss', 'content': 0.003901219693943858, 'timestamp': '2025-09-10 02:29:36.755976', 'step': 3507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:36.784651', 'step': 3507, 'epoch': 2} {'type': 'loss', 'content': 0.0010723823215812445, 'timestamp': '2025-09-10 02:29:36.808105', 'step': 3508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:36.838189', 'step': 3508, 'epoch': 2} {'type': 'loss', 'content': 0.0008635511621832848, 'timestamp': '2025-09-10 02:29:36.840762', 'step': 3509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:36.869860', 'step': 3509, 'epoch': 2} {'type': 'loss', 'content': 0.0037847573403269053, 'timestamp': '2025-09-10 02:29:36.872004', 'step': 3510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:36.900657', 'step': 3510, 'epoch': 2} {'type': 'loss', 'content': 0.0011993770021945238, 'timestamp': '2025-09-10 02:29:36.902912', 'step': 3511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:36.931867', 'step': 3511, 'epoch': 2} {'type': 'loss', 'content': 0.07670789211988449, 'timestamp': '2025-09-10 02:29:36.955387', 'step': 3512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:36.984640', 'step': 3512, 'epoch': 2} {'type': 'loss', 'content': 0.0012009853962808847, 'timestamp': '2025-09-10 02:29:36.986578', 'step': 3513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:37.015164', 'step': 3513, 'epoch': 2} {'type': 'loss', 'content': 0.004377874545753002, 'timestamp': '2025-09-10 02:29:37.017174', 'step': 3514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:37.045656', 'step': 3514, 'epoch': 2} {'type': 'loss', 'content': 0.001754789613187313, 'timestamp': '2025-09-10 02:29:37.047481', 'step': 3515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:37.076835', 'step': 3515, 'epoch': 2} {'type': 'loss', 'content': 0.007395769469439983, 'timestamp': '2025-09-10 02:29:37.100299', 'step': 3516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:37.129094', 'step': 3516, 'epoch': 2} {'type': 'loss', 'content': 0.000709313084371388, 'timestamp': '2025-09-10 02:29:37.131152', 'step': 3517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:37.159892', 'step': 3517, 'epoch': 2} {'type': 'loss', 'content': 0.0031018254812806845, 'timestamp': '2025-09-10 02:29:37.161784', 'step': 3518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:37.190375', 'step': 3518, 'epoch': 2} {'type': 'loss', 'content': 0.000283032248262316, 'timestamp': '2025-09-10 02:29:37.192222', 'step': 3519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:37.220567', 'step': 3519, 'epoch': 2} {'type': 'loss', 'content': 0.0018501004669815302, 'timestamp': '2025-09-10 02:29:37.244068', 'step': 3520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:37.273199', 'step': 3520, 'epoch': 2} {'type': 'loss', 'content': 0.02471715770661831, 'timestamp': '2025-09-10 02:29:37.275160', 'step': 3521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:37.303703', 'step': 3521, 'epoch': 2} {'type': 'loss', 'content': 0.0008763830992393196, 'timestamp': '2025-09-10 02:29:37.307100', 'step': 3522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:37.335803', 'step': 3522, 'epoch': 2} {'type': 'loss', 'content': 0.0005971781210973859, 'timestamp': '2025-09-10 02:29:37.337669', 'step': 3523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:37.366449', 'step': 3523, 'epoch': 2} {'type': 'loss', 'content': 0.024703845381736755, 'timestamp': '2025-09-10 02:29:37.390283', 'step': 3524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:37.420164', 'step': 3524, 'epoch': 2} {'type': 'loss', 'content': 0.0002873912744689733, 'timestamp': '2025-09-10 02:29:37.422345', 'step': 3525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:37.451208', 'step': 3525, 'epoch': 2} {'type': 'loss', 'content': 0.004364865832030773, 'timestamp': '2025-09-10 02:29:37.456125', 'step': 3526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:37.485447', 'step': 3526, 'epoch': 2} {'type': 'loss', 'content': 0.0008310721605084836, 'timestamp': '2025-09-10 02:29:37.487392', 'step': 3527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:37.516317', 'step': 3527, 'epoch': 2} {'type': 'loss', 'content': 0.0021310069132596254, 'timestamp': '2025-09-10 02:29:37.539554', 'step': 3528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:37.568742', 'step': 3528, 'epoch': 2} {'type': 'loss', 'content': 0.014144135639071465, 'timestamp': '2025-09-10 02:29:37.570781', 'step': 3529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:37.600811', 'step': 3529, 'epoch': 2} {'type': 'loss', 'content': 0.012850276194512844, 'timestamp': '2025-09-10 02:29:37.602796', 'step': 3530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:37.631702', 'step': 3530, 'epoch': 2} {'type': 'loss', 'content': 0.05908694490790367, 'timestamp': '2025-09-10 02:29:37.634020', 'step': 3531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:37.663433', 'step': 3531, 'epoch': 2} {'type': 'loss', 'content': 0.01407054252922535, 'timestamp': '2025-09-10 02:29:37.686971', 'step': 3532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:37.716974', 'step': 3532, 'epoch': 2} {'type': 'loss', 'content': 0.0031040941830724478, 'timestamp': '2025-09-10 02:29:37.718962', 'step': 3533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:37.748567', 'step': 3533, 'epoch': 2} {'type': 'loss', 'content': 0.003990839701145887, 'timestamp': '2025-09-10 02:29:37.752014', 'step': 3534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:37.784899', 'step': 3534, 'epoch': 2} {'type': 'loss', 'content': 0.022437114268541336, 'timestamp': '2025-09-10 02:29:37.791828', 'step': 3535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:37.822638', 'step': 3535, 'epoch': 2} {'type': 'loss', 'content': 0.001835890463553369, 'timestamp': '2025-09-10 02:29:37.846092', 'step': 3536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:37.875932', 'step': 3536, 'epoch': 2} {'type': 'loss', 'content': 0.04690726473927498, 'timestamp': '2025-09-10 02:29:37.882277', 'step': 3537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:37.911826', 'step': 3537, 'epoch': 2} {'type': 'loss', 'content': 0.0009008381748571992, 'timestamp': '2025-09-10 02:29:37.913927', 'step': 3538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:37.945715', 'step': 3538, 'epoch': 2} {'type': 'loss', 'content': 0.003957767505198717, 'timestamp': '2025-09-10 02:29:37.947690', 'step': 3539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:37.976246', 'step': 3539, 'epoch': 2} {'type': 'loss', 'content': 0.003778811078518629, 'timestamp': '2025-09-10 02:29:38.000291', 'step': 3540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:38.029685', 'step': 3540, 'epoch': 2} {'type': 'loss', 'content': 0.0017288854578509927, 'timestamp': '2025-09-10 02:29:38.031784', 'step': 3541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:38.063693', 'step': 3541, 'epoch': 2} {'type': 'loss', 'content': 0.0164285097271204, 'timestamp': '2025-09-10 02:29:38.065775', 'step': 3542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:38.095185', 'step': 3542, 'epoch': 2} {'type': 'loss', 'content': 0.0016188893932849169, 'timestamp': '2025-09-10 02:29:38.097295', 'step': 3543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:38.127092', 'step': 3543, 'epoch': 2} {'type': 'loss', 'content': 0.0013423208147287369, 'timestamp': '2025-09-10 02:29:38.154809', 'step': 3544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:38.183616', 'step': 3544, 'epoch': 2} {'type': 'loss', 'content': 0.006067909765988588, 'timestamp': '2025-09-10 02:29:38.185664', 'step': 3545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:38.219929', 'step': 3545, 'epoch': 2} {'type': 'loss', 'content': 0.040989816188812256, 'timestamp': '2025-09-10 02:29:38.222035', 'step': 3546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:38.251384', 'step': 3546, 'epoch': 2} {'type': 'loss', 'content': 0.0001859536860138178, 'timestamp': '2025-09-10 02:29:38.253232', 'step': 3547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:38.282909', 'step': 3547, 'epoch': 2} {'type': 'loss', 'content': 0.028182286769151688, 'timestamp': '2025-09-10 02:29:38.306464', 'step': 3548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:38.335805', 'step': 3548, 'epoch': 2} {'type': 'loss', 'content': 0.00029088411247357726, 'timestamp': '2025-09-10 02:29:38.337863', 'step': 3549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:38.372903', 'step': 3549, 'epoch': 2} {'type': 'loss', 'content': 0.0074108196422457695, 'timestamp': '2025-09-10 02:29:38.374929', 'step': 3550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:38.405216', 'step': 3550, 'epoch': 2} {'type': 'loss', 'content': 0.014489994384348392, 'timestamp': '2025-09-10 02:29:38.407397', 'step': 3551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:38.436446', 'step': 3551, 'epoch': 2} {'type': 'loss', 'content': 0.006930240895599127, 'timestamp': '2025-09-10 02:29:38.460075', 'step': 3552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:38.489110', 'step': 3552, 'epoch': 2} {'type': 'loss', 'content': 0.004822719842195511, 'timestamp': '2025-09-10 02:29:38.492904', 'step': 3553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:38.522591', 'step': 3553, 'epoch': 2} {'type': 'loss', 'content': 0.01741173304617405, 'timestamp': '2025-09-10 02:29:38.524799', 'step': 3554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:38.554263', 'step': 3554, 'epoch': 2} {'type': 'loss', 'content': 0.013965368270874023, 'timestamp': '2025-09-10 02:29:38.556134', 'step': 3555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:38.585763', 'step': 3555, 'epoch': 2} {'type': 'loss', 'content': 0.0038411770947277546, 'timestamp': '2025-09-10 02:29:38.609351', 'step': 3556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:38.639562', 'step': 3556, 'epoch': 2} {'type': 'loss', 'content': 0.00871545635163784, 'timestamp': '2025-09-10 02:29:38.641667', 'step': 3557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:38.673246', 'step': 3557, 'epoch': 2} {'type': 'loss', 'content': 0.0006059638108126819, 'timestamp': '2025-09-10 02:29:38.675202', 'step': 3558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:38.710990', 'step': 3558, 'epoch': 2} {'type': 'loss', 'content': 0.0036140538286417723, 'timestamp': '2025-09-10 02:29:38.712762', 'step': 3559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:38.741920', 'step': 3559, 'epoch': 2} {'type': 'loss', 'content': 0.006955344695597887, 'timestamp': '2025-09-10 02:29:38.765435', 'step': 3560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:38.795028', 'step': 3560, 'epoch': 2} {'type': 'loss', 'content': 0.007602483965456486, 'timestamp': '2025-09-10 02:29:38.796848', 'step': 3561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:38.826087', 'step': 3561, 'epoch': 2} {'type': 'loss', 'content': 0.014284110628068447, 'timestamp': '2025-09-10 02:29:38.827781', 'step': 3562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:38.856812', 'step': 3562, 'epoch': 2} {'type': 'loss', 'content': 0.0003184565284755081, 'timestamp': '2025-09-10 02:29:38.858825', 'step': 3563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:38.887686', 'step': 3563, 'epoch': 2} {'type': 'loss', 'content': 0.025357427075505257, 'timestamp': '2025-09-10 02:29:38.911382', 'step': 3564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:38.940598', 'step': 3564, 'epoch': 2} {'type': 'loss', 'content': 0.022137897089123726, 'timestamp': '2025-09-10 02:29:38.942670', 'step': 3565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:38.971849', 'step': 3565, 'epoch': 2} {'type': 'loss', 'content': 0.0037835354451090097, 'timestamp': '2025-09-10 02:29:38.973670', 'step': 3566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:39.003703', 'step': 3566, 'epoch': 2} {'type': 'loss', 'content': 0.027557658031582832, 'timestamp': '2025-09-10 02:29:39.005622', 'step': 3567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:39.034227', 'step': 3567, 'epoch': 2} {'type': 'loss', 'content': 0.0037048538215458393, 'timestamp': '2025-09-10 02:29:39.057920', 'step': 3568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:39.086916', 'step': 3568, 'epoch': 2} {'type': 'loss', 'content': 0.025103973224759102, 'timestamp': '2025-09-10 02:29:39.088898', 'step': 3569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:39.117819', 'step': 3569, 'epoch': 2} {'type': 'loss', 'content': 0.0010510541032999754, 'timestamp': '2025-09-10 02:29:39.119763', 'step': 3570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:39.148279', 'step': 3570, 'epoch': 2} {'type': 'loss', 'content': 0.006197761744260788, 'timestamp': '2025-09-10 02:29:39.150163', 'step': 3571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:39.180052', 'step': 3571, 'epoch': 2} {'type': 'loss', 'content': 0.005343415774405003, 'timestamp': '2025-09-10 02:29:39.203396', 'step': 3572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:39.235151', 'step': 3572, 'epoch': 2} {'type': 'loss', 'content': 0.0004703709564637393, 'timestamp': '2025-09-10 02:29:39.236903', 'step': 3573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:39.269100', 'step': 3573, 'epoch': 2} {'type': 'loss', 'content': 0.03291929513216019, 'timestamp': '2025-09-10 02:29:39.271075', 'step': 3574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:39.299961', 'step': 3574, 'epoch': 2} {'type': 'loss', 'content': 0.015463131479918957, 'timestamp': '2025-09-10 02:29:39.301829', 'step': 3575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:39.330732', 'step': 3575, 'epoch': 2} {'type': 'loss', 'content': 0.013998471200466156, 'timestamp': '2025-09-10 02:29:39.354382', 'step': 3576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:39.390111', 'step': 3576, 'epoch': 2} {'type': 'loss', 'content': 0.00035301089519634843, 'timestamp': '2025-09-10 02:29:39.391825', 'step': 3577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:39.421640', 'step': 3577, 'epoch': 2} {'type': 'loss', 'content': 0.0246095210313797, 'timestamp': '2025-09-10 02:29:39.423527', 'step': 3578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:39.452902', 'step': 3578, 'epoch': 2} {'type': 'loss', 'content': 0.0017538743559271097, 'timestamp': '2025-09-10 02:29:39.454921', 'step': 3579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:39.484002', 'step': 3579, 'epoch': 2} {'type': 'loss', 'content': 0.0009338590898551047, 'timestamp': '2025-09-10 02:29:39.507483', 'step': 3580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:39.537578', 'step': 3580, 'epoch': 2} {'type': 'loss', 'content': 0.00333792925812304, 'timestamp': '2025-09-10 02:29:39.539396', 'step': 3581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:39.568248', 'step': 3581, 'epoch': 2} {'type': 'loss', 'content': 0.019255487248301506, 'timestamp': '2025-09-10 02:29:39.570346', 'step': 3582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:39.599487', 'step': 3582, 'epoch': 2} {'type': 'loss', 'content': 0.031330667436122894, 'timestamp': '2025-09-10 02:29:39.601413', 'step': 3583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:39.630429', 'step': 3583, 'epoch': 2} {'type': 'loss', 'content': 0.00597680127248168, 'timestamp': '2025-09-10 02:29:39.653885', 'step': 3584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:39.682516', 'step': 3584, 'epoch': 2} {'type': 'loss', 'content': 0.0011178290005773306, 'timestamp': '2025-09-10 02:29:39.684541', 'step': 3585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:39.713080', 'step': 3585, 'epoch': 2} {'type': 'loss', 'content': 0.0012423843145370483, 'timestamp': '2025-09-10 02:29:39.715040', 'step': 3586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:39.743530', 'step': 3586, 'epoch': 2} {'type': 'loss', 'content': 0.0043282704427838326, 'timestamp': '2025-09-10 02:29:39.745362', 'step': 3587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:39.774411', 'step': 3587, 'epoch': 2} {'type': 'loss', 'content': 0.0014382840599864721, 'timestamp': '2025-09-10 02:29:39.797974', 'step': 3588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:39.826950', 'step': 3588, 'epoch': 2} {'type': 'loss', 'content': 0.022662704810500145, 'timestamp': '2025-09-10 02:29:39.828788', 'step': 3589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:39.857718', 'step': 3589, 'epoch': 2} {'type': 'loss', 'content': 0.0030301802325993776, 'timestamp': '2025-09-10 02:29:39.859406', 'step': 3590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:39.888396', 'step': 3590, 'epoch': 2} {'type': 'loss', 'content': 0.0027410441543906927, 'timestamp': '2025-09-10 02:29:39.890143', 'step': 3591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:39.919870', 'step': 3591, 'epoch': 2} {'type': 'loss', 'content': 0.0033905827440321445, 'timestamp': '2025-09-10 02:29:39.943159', 'step': 3592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:39.972171', 'step': 3592, 'epoch': 2} {'type': 'loss', 'content': 0.028375597670674324, 'timestamp': '2025-09-10 02:29:39.974105', 'step': 3593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:40.004486', 'step': 3593, 'epoch': 2} {'type': 'loss', 'content': 0.029348796233534813, 'timestamp': '2025-09-10 02:29:40.006320', 'step': 3594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:40.035364', 'step': 3594, 'epoch': 2} {'type': 'loss', 'content': 0.0023193187080323696, 'timestamp': '2025-09-10 02:29:40.037161', 'step': 3595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:40.065532', 'step': 3595, 'epoch': 2} {'type': 'loss', 'content': 0.003464184468612075, 'timestamp': '2025-09-10 02:29:40.089073', 'step': 3596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:40.118192', 'step': 3596, 'epoch': 2} {'type': 'loss', 'content': 0.06731926649808884, 'timestamp': '2025-09-10 02:29:40.120299', 'step': 3597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:40.149152', 'step': 3597, 'epoch': 2} {'type': 'loss', 'content': 0.0005612451932393014, 'timestamp': '2025-09-10 02:29:40.155596', 'step': 3598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:40.183926', 'step': 3598, 'epoch': 2} {'type': 'loss', 'content': 0.0018065435579046607, 'timestamp': '2025-09-10 02:29:40.185954', 'step': 3599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:40.214489', 'step': 3599, 'epoch': 2} {'type': 'loss', 'content': 0.001333405263721943, 'timestamp': '2025-09-10 02:29:40.237912', 'step': 3600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:40.266931', 'step': 3600, 'epoch': 2} {'type': 'loss', 'content': 0.0012239479692652822, 'timestamp': '2025-09-10 02:29:40.268947', 'step': 3601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:40.297507', 'step': 3601, 'epoch': 2} {'type': 'loss', 'content': 0.004321325104683638, 'timestamp': '2025-09-10 02:29:40.299658', 'step': 3602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:40.328340', 'step': 3602, 'epoch': 2} {'type': 'loss', 'content': 0.003785841166973114, 'timestamp': '2025-09-10 02:29:40.330270', 'step': 3603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:40.359365', 'step': 3603, 'epoch': 2} {'type': 'loss', 'content': 0.015127211809158325, 'timestamp': '2025-09-10 02:29:40.382795', 'step': 3604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:40.411306', 'step': 3604, 'epoch': 2} {'type': 'loss', 'content': 0.002256478648632765, 'timestamp': '2025-09-10 02:29:40.413114', 'step': 3605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:40.441854', 'step': 3605, 'epoch': 2} {'type': 'loss', 'content': 0.00017908155859913677, 'timestamp': '2025-09-10 02:29:40.443763', 'step': 3606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:40.473100', 'step': 3606, 'epoch': 2} {'type': 'loss', 'content': 0.0023490507155656815, 'timestamp': '2025-09-10 02:29:40.474963', 'step': 3607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:40.503398', 'step': 3607, 'epoch': 2} {'type': 'loss', 'content': 0.028720121830701828, 'timestamp': '2025-09-10 02:29:40.526927', 'step': 3608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:40.563503', 'step': 3608, 'epoch': 2} {'type': 'loss', 'content': 0.0009512993274256587, 'timestamp': '2025-09-10 02:29:40.565294', 'step': 3609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:40.594105', 'step': 3609, 'epoch': 2} {'type': 'loss', 'content': 0.001343045150861144, 'timestamp': '2025-09-10 02:29:40.595963', 'step': 3610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:40.631908', 'step': 3610, 'epoch': 2} {'type': 'loss', 'content': 0.00167557701934129, 'timestamp': '2025-09-10 02:29:40.634504', 'step': 3611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:40.663199', 'step': 3611, 'epoch': 2} {'type': 'loss', 'content': 0.0007090084836818278, 'timestamp': '2025-09-10 02:29:40.686606', 'step': 3612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:40.716893', 'step': 3612, 'epoch': 2} {'type': 'loss', 'content': 0.00794034544378519, 'timestamp': '2025-09-10 02:29:40.718910', 'step': 3613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:40.754810', 'step': 3613, 'epoch': 2} {'type': 'loss', 'content': 0.0007401722832582891, 'timestamp': '2025-09-10 02:29:40.757011', 'step': 3614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:40.792395', 'step': 3614, 'epoch': 2} {'type': 'loss', 'content': 0.035306137055158615, 'timestamp': '2025-09-10 02:29:40.794412', 'step': 3615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:40.825297', 'step': 3615, 'epoch': 2} {'type': 'loss', 'content': 0.02245972864329815, 'timestamp': '2025-09-10 02:29:40.849039', 'step': 3616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:40.881486', 'step': 3616, 'epoch': 2} {'type': 'loss', 'content': 0.0003232559538446367, 'timestamp': '2025-09-10 02:29:40.883871', 'step': 3617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:40.914878', 'step': 3617, 'epoch': 2} {'type': 'loss', 'content': 0.020015936344861984, 'timestamp': '2025-09-10 02:29:40.921512', 'step': 3618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:40.954943', 'step': 3618, 'epoch': 2} {'type': 'loss', 'content': 0.002817420056089759, 'timestamp': '2025-09-10 02:29:40.961041', 'step': 3619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:40.993002', 'step': 3619, 'epoch': 2} {'type': 'loss', 'content': 0.043967586010694504, 'timestamp': '2025-09-10 02:29:41.018849', 'step': 3620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:41.049809', 'step': 3620, 'epoch': 2} {'type': 'loss', 'content': 0.0570630244910717, 'timestamp': '2025-09-10 02:29:41.052786', 'step': 3621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:41.081978', 'step': 3621, 'epoch': 2} {'type': 'loss', 'content': 0.004470278043299913, 'timestamp': '2025-09-10 02:29:41.084857', 'step': 3622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:41.114270', 'step': 3622, 'epoch': 2} {'type': 'loss', 'content': 0.014231848530471325, 'timestamp': '2025-09-10 02:29:41.116302', 'step': 3623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:41.146252', 'step': 3623, 'epoch': 2} {'type': 'loss', 'content': 0.0008056826191022992, 'timestamp': '2025-09-10 02:29:41.169635', 'step': 3624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:41.201255', 'step': 3624, 'epoch': 2} {'type': 'loss', 'content': 0.00075248145731166, 'timestamp': '2025-09-10 02:29:41.203188', 'step': 3625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:41.232301', 'step': 3625, 'epoch': 2} {'type': 'loss', 'content': 0.0004431054985616356, 'timestamp': '2025-09-10 02:29:41.234341', 'step': 3626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:41.266107', 'step': 3626, 'epoch': 2} {'type': 'loss', 'content': 0.057048819959163666, 'timestamp': '2025-09-10 02:29:41.270080', 'step': 3627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:41.301025', 'step': 3627, 'epoch': 2} {'type': 'loss', 'content': 0.0006368341855704784, 'timestamp': '2025-09-10 02:29:41.326998', 'step': 3628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:41.361272', 'step': 3628, 'epoch': 2} {'type': 'loss', 'content': 0.01359105296432972, 'timestamp': '2025-09-10 02:29:41.367522', 'step': 3629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:41.397693', 'step': 3629, 'epoch': 2} {'type': 'loss', 'content': 0.009940946474671364, 'timestamp': '2025-09-10 02:29:41.399775', 'step': 3630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:41.430334', 'step': 3630, 'epoch': 2} {'type': 'loss', 'content': 0.01117002870887518, 'timestamp': '2025-09-10 02:29:41.432447', 'step': 3631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:41.465814', 'step': 3631, 'epoch': 2} {'type': 'loss', 'content': 0.04185846075415611, 'timestamp': '2025-09-10 02:29:41.489246', 'step': 3632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:41.519393', 'step': 3632, 'epoch': 2} {'type': 'loss', 'content': 0.007980273105204105, 'timestamp': '2025-09-10 02:29:41.521333', 'step': 3633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:41.550420', 'step': 3633, 'epoch': 2} {'type': 'loss', 'content': 0.0005918587557971478, 'timestamp': '2025-09-10 02:29:41.555172', 'step': 3634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:41.590700', 'step': 3634, 'epoch': 2} {'type': 'loss', 'content': 0.0007793071563355625, 'timestamp': '2025-09-10 02:29:41.592700', 'step': 3635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:41.621567', 'step': 3635, 'epoch': 2} {'type': 'loss', 'content': 0.004654579795897007, 'timestamp': '2025-09-10 02:29:41.644982', 'step': 3636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:41.674048', 'step': 3636, 'epoch': 2} {'type': 'loss', 'content': 0.018719719722867012, 'timestamp': '2025-09-10 02:29:41.675911', 'step': 3637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:41.706175', 'step': 3637, 'epoch': 2} {'type': 'loss', 'content': 0.007895631715655327, 'timestamp': '2025-09-10 02:29:41.708048', 'step': 3638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:41.736802', 'step': 3638, 'epoch': 2} {'type': 'loss', 'content': 0.0034556484315544367, 'timestamp': '2025-09-10 02:29:41.738838', 'step': 3639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:41.767304', 'step': 3639, 'epoch': 2} {'type': 'loss', 'content': 0.001310615218244493, 'timestamp': '2025-09-10 02:29:41.791303', 'step': 3640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:41.820525', 'step': 3640, 'epoch': 2} {'type': 'loss', 'content': 0.007575228810310364, 'timestamp': '2025-09-10 02:29:41.822780', 'step': 3641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:41.854705', 'step': 3641, 'epoch': 2} {'type': 'loss', 'content': 0.0037589308340102434, 'timestamp': '2025-09-10 02:29:41.858482', 'step': 3642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:41.889651', 'step': 3642, 'epoch': 2} {'type': 'loss', 'content': 0.0017815810861065984, 'timestamp': '2025-09-10 02:29:41.891498', 'step': 3643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:41.920561', 'step': 3643, 'epoch': 2} {'type': 'loss', 'content': 0.003863951889798045, 'timestamp': '2025-09-10 02:29:41.944394', 'step': 3644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:41.973264', 'step': 3644, 'epoch': 2} {'type': 'loss', 'content': 0.000795595406088978, 'timestamp': '2025-09-10 02:29:41.975090', 'step': 3645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:42.003616', 'step': 3645, 'epoch': 2} {'type': 'loss', 'content': 0.01848900318145752, 'timestamp': '2025-09-10 02:29:42.006599', 'step': 3646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:42.039592', 'step': 3646, 'epoch': 2} {'type': 'loss', 'content': 0.011643946170806885, 'timestamp': '2025-09-10 02:29:42.045962', 'step': 3647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:42.077836', 'step': 3647, 'epoch': 2} {'type': 'loss', 'content': 0.002694113878533244, 'timestamp': '2025-09-10 02:29:42.101136', 'step': 3648, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1582003754624}], 'timestamp': '2025-09-10 02:29:44.126698', 'step': 3648, 'epoch': 2} {'type': 'pplx', 'content': 2284447.3901765565, 'timestamp': '2025-09-10 02:29:44.128854', 'step': 3648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:44.156395', 'step': 3648, 'epoch': 2} {'type': 'loss', 'content': 0.027025112882256508, 'timestamp': '2025-09-10 02:29:44.161102', 'step': 3649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:44.191047', 'step': 3649, 'epoch': 2} {'type': 'loss', 'content': 0.004025637172162533, 'timestamp': '2025-09-10 02:29:44.193140', 'step': 3650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:44.222591', 'step': 3650, 'epoch': 2} {'type': 'loss', 'content': 0.0014656836865469813, 'timestamp': '2025-09-10 02:29:44.224564', 'step': 3651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:44.253352', 'step': 3651, 'epoch': 2} {'type': 'loss', 'content': 0.003369029611349106, 'timestamp': '2025-09-10 02:29:44.277028', 'step': 3652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:44.306602', 'step': 3652, 'epoch': 2} {'type': 'loss', 'content': 0.002173537854105234, 'timestamp': '2025-09-10 02:29:44.308301', 'step': 3653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:44.337315', 'step': 3653, 'epoch': 2} {'type': 'loss', 'content': 0.033811409026384354, 'timestamp': '2025-09-10 02:29:44.339435', 'step': 3654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:44.368581', 'step': 3654, 'epoch': 2} {'type': 'loss', 'content': 0.006262099836021662, 'timestamp': '2025-09-10 02:29:44.370495', 'step': 3655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:44.399621', 'step': 3655, 'epoch': 2} {'type': 'loss', 'content': 0.007304125931113958, 'timestamp': '2025-09-10 02:29:44.422966', 'step': 3656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:44.452397', 'step': 3656, 'epoch': 2} {'type': 'loss', 'content': 0.004982686601579189, 'timestamp': '2025-09-10 02:29:44.454264', 'step': 3657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:44.483863', 'step': 3657, 'epoch': 2} {'type': 'loss', 'content': 0.0073280492797493935, 'timestamp': '2025-09-10 02:29:44.487318', 'step': 3658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:44.517012', 'step': 3658, 'epoch': 2} {'type': 'loss', 'content': 0.0029368854593485594, 'timestamp': '2025-09-10 02:29:44.519002', 'step': 3659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:44.549468', 'step': 3659, 'epoch': 2} {'type': 'loss', 'content': 0.0009214190649800003, 'timestamp': '2025-09-10 02:29:44.573088', 'step': 3660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:44.603013', 'step': 3660, 'epoch': 2} {'type': 'loss', 'content': 0.01310895849019289, 'timestamp': '2025-09-10 02:29:44.604984', 'step': 3661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:44.634038', 'step': 3661, 'epoch': 2} {'type': 'loss', 'content': 0.0038477382622659206, 'timestamp': '2025-09-10 02:29:44.635836', 'step': 3662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:44.664723', 'step': 3662, 'epoch': 2} {'type': 'loss', 'content': 0.017818301916122437, 'timestamp': '2025-09-10 02:29:44.667211', 'step': 3663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:44.696327', 'step': 3663, 'epoch': 2} {'type': 'loss', 'content': 0.0033985336776822805, 'timestamp': '2025-09-10 02:29:44.725305', 'step': 3664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:44.755101', 'step': 3664, 'epoch': 2} {'type': 'loss', 'content': 0.015548835508525372, 'timestamp': '2025-09-10 02:29:44.756957', 'step': 3665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:44.785922', 'step': 3665, 'epoch': 2} {'type': 'loss', 'content': 0.0015692234737798572, 'timestamp': '2025-09-10 02:29:44.788037', 'step': 3666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:44.817352', 'step': 3666, 'epoch': 2} {'type': 'loss', 'content': 0.026195894926786423, 'timestamp': '2025-09-10 02:29:44.819192', 'step': 3667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:44.848545', 'step': 3667, 'epoch': 2} {'type': 'loss', 'content': 0.015752393752336502, 'timestamp': '2025-09-10 02:29:44.871929', 'step': 3668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:44.901285', 'step': 3668, 'epoch': 2} {'type': 'loss', 'content': 0.03612801805138588, 'timestamp': '2025-09-10 02:29:44.903304', 'step': 3669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:44.932536', 'step': 3669, 'epoch': 2} {'type': 'loss', 'content': 0.00046271312749013305, 'timestamp': '2025-09-10 02:29:44.934368', 'step': 3670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:44.963490', 'step': 3670, 'epoch': 2} {'type': 'loss', 'content': 0.002088590059429407, 'timestamp': '2025-09-10 02:29:44.965588', 'step': 3671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:44.994597', 'step': 3671, 'epoch': 2} {'type': 'loss', 'content': 0.0005249869427643716, 'timestamp': '2025-09-10 02:29:45.017756', 'step': 3672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:45.046660', 'step': 3672, 'epoch': 2} {'type': 'loss', 'content': 0.03901727870106697, 'timestamp': '2025-09-10 02:29:45.048378', 'step': 3673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:45.077584', 'step': 3673, 'epoch': 2} {'type': 'loss', 'content': 0.008338806219398975, 'timestamp': '2025-09-10 02:29:45.079415', 'step': 3674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:45.108870', 'step': 3674, 'epoch': 2} {'type': 'loss', 'content': 0.0008816872723400593, 'timestamp': '2025-09-10 02:29:45.110647', 'step': 3675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:45.139433', 'step': 3675, 'epoch': 2} {'type': 'loss', 'content': 0.024514347314834595, 'timestamp': '2025-09-10 02:29:45.162601', 'step': 3676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:45.191212', 'step': 3676, 'epoch': 2} {'type': 'loss', 'content': 0.009860238060355186, 'timestamp': '2025-09-10 02:29:45.192807', 'step': 3677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:45.221868', 'step': 3677, 'epoch': 2} {'type': 'loss', 'content': 0.04991964250802994, 'timestamp': '2025-09-10 02:29:45.224017', 'step': 3678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:45.252973', 'step': 3678, 'epoch': 2} {'type': 'loss', 'content': 0.02714690938591957, 'timestamp': '2025-09-10 02:29:45.255099', 'step': 3679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:45.284237', 'step': 3679, 'epoch': 2} {'type': 'loss', 'content': 0.003578531090170145, 'timestamp': '2025-09-10 02:29:45.307659', 'step': 3680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:45.337122', 'step': 3680, 'epoch': 2} {'type': 'loss', 'content': 0.022830527275800705, 'timestamp': '2025-09-10 02:29:45.339126', 'step': 3681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:45.368573', 'step': 3681, 'epoch': 2} {'type': 'loss', 'content': 0.03104826994240284, 'timestamp': '2025-09-10 02:29:45.370416', 'step': 3682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:45.400070', 'step': 3682, 'epoch': 2} {'type': 'loss', 'content': 0.004579231142997742, 'timestamp': '2025-09-10 02:29:45.401966', 'step': 3683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:45.430778', 'step': 3683, 'epoch': 2} {'type': 'loss', 'content': 0.004558298271149397, 'timestamp': '2025-09-10 02:29:45.454081', 'step': 3684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:45.483034', 'step': 3684, 'epoch': 2} {'type': 'loss', 'content': 0.011908398941159248, 'timestamp': '2025-09-10 02:29:45.484786', 'step': 3685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:45.513470', 'step': 3685, 'epoch': 2} {'type': 'loss', 'content': 0.015865741297602654, 'timestamp': '2025-09-10 02:29:45.515192', 'step': 3686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:45.544192', 'step': 3686, 'epoch': 2} {'type': 'loss', 'content': 0.009198537096381187, 'timestamp': '2025-09-10 02:29:45.546231', 'step': 3687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:45.576016', 'step': 3687, 'epoch': 2} {'type': 'loss', 'content': 0.030312685295939445, 'timestamp': '2025-09-10 02:29:45.599341', 'step': 3688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:45.628710', 'step': 3688, 'epoch': 2} {'type': 'loss', 'content': 0.0030524202156811953, 'timestamp': '2025-09-10 02:29:45.630726', 'step': 3689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:45.659673', 'step': 3689, 'epoch': 2} {'type': 'loss', 'content': 0.009920709766447544, 'timestamp': '2025-09-10 02:29:45.661520', 'step': 3690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:45.690247', 'step': 3690, 'epoch': 2} {'type': 'loss', 'content': 0.0010171084431931376, 'timestamp': '2025-09-10 02:29:45.692245', 'step': 3691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:45.723403', 'step': 3691, 'epoch': 2} {'type': 'loss', 'content': 0.02838117443025112, 'timestamp': '2025-09-10 02:29:45.746811', 'step': 3692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:45.776143', 'step': 3692, 'epoch': 2} {'type': 'loss', 'content': 0.05404944345355034, 'timestamp': '2025-09-10 02:29:45.778399', 'step': 3693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:45.807484', 'step': 3693, 'epoch': 2} {'type': 'loss', 'content': 0.0054108272306621075, 'timestamp': '2025-09-10 02:29:45.809633', 'step': 3694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:45.838904', 'step': 3694, 'epoch': 2} {'type': 'loss', 'content': 0.005463256500661373, 'timestamp': '2025-09-10 02:29:45.840502', 'step': 3695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:45.869899', 'step': 3695, 'epoch': 2} {'type': 'loss', 'content': 0.001773970085196197, 'timestamp': '2025-09-10 02:29:45.893082', 'step': 3696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:45.922866', 'step': 3696, 'epoch': 2} {'type': 'loss', 'content': 0.006801222451031208, 'timestamp': '2025-09-10 02:29:45.924539', 'step': 3697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:45.953640', 'step': 3697, 'epoch': 2} {'type': 'loss', 'content': 0.0006268368451856077, 'timestamp': '2025-09-10 02:29:45.955530', 'step': 3698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:45.984873', 'step': 3698, 'epoch': 2} {'type': 'loss', 'content': 0.006464776117354631, 'timestamp': '2025-09-10 02:29:45.986761', 'step': 3699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:46.015926', 'step': 3699, 'epoch': 2} {'type': 'loss', 'content': 0.026705535128712654, 'timestamp': '2025-09-10 02:29:46.039313', 'step': 3700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:46.068635', 'step': 3700, 'epoch': 2} {'type': 'loss', 'content': 0.0059391348622739315, 'timestamp': '2025-09-10 02:29:46.070582', 'step': 3701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:46.099394', 'step': 3701, 'epoch': 2} {'type': 'loss', 'content': 0.06555979698896408, 'timestamp': '2025-09-10 02:29:46.101382', 'step': 3702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:46.130917', 'step': 3702, 'epoch': 2} {'type': 'loss', 'content': 0.00795739609748125, 'timestamp': '2025-09-10 02:29:46.132751', 'step': 3703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:46.161358', 'step': 3703, 'epoch': 2} {'type': 'loss', 'content': 0.00283786840736866, 'timestamp': '2025-09-10 02:29:46.184685', 'step': 3704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:46.213974', 'step': 3704, 'epoch': 2} {'type': 'loss', 'content': 0.0019304228480905294, 'timestamp': '2025-09-10 02:29:46.215845', 'step': 3705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:46.244444', 'step': 3705, 'epoch': 2} {'type': 'loss', 'content': 0.014217148534953594, 'timestamp': '2025-09-10 02:29:46.246225', 'step': 3706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:46.275060', 'step': 3706, 'epoch': 2} {'type': 'loss', 'content': 0.0016484770458191633, 'timestamp': '2025-09-10 02:29:46.276972', 'step': 3707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:46.305795', 'step': 3707, 'epoch': 2} {'type': 'loss', 'content': 0.00168976082932204, 'timestamp': '2025-09-10 02:29:46.329297', 'step': 3708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:46.358028', 'step': 3708, 'epoch': 2} {'type': 'loss', 'content': 0.00806464534252882, 'timestamp': '2025-09-10 02:29:46.359778', 'step': 3709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:46.388534', 'step': 3709, 'epoch': 2} {'type': 'loss', 'content': 0.0006172872963361442, 'timestamp': '2025-09-10 02:29:46.390313', 'step': 3710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:46.419396', 'step': 3710, 'epoch': 2} {'type': 'loss', 'content': 0.0014022333780303597, 'timestamp': '2025-09-10 02:29:46.421375', 'step': 3711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:46.450540', 'step': 3711, 'epoch': 2} {'type': 'loss', 'content': 0.000817774620372802, 'timestamp': '2025-09-10 02:29:46.473868', 'step': 3712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:46.502406', 'step': 3712, 'epoch': 2} {'type': 'loss', 'content': 0.0027491210494190454, 'timestamp': '2025-09-10 02:29:46.504433', 'step': 3713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:46.533182', 'step': 3713, 'epoch': 2} {'type': 'loss', 'content': 0.017350925132632256, 'timestamp': '2025-09-10 02:29:46.535052', 'step': 3714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:46.563994', 'step': 3714, 'epoch': 2} {'type': 'loss', 'content': 0.0004906453541480005, 'timestamp': '2025-09-10 02:29:46.565820', 'step': 3715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:46.594812', 'step': 3715, 'epoch': 2} {'type': 'loss', 'content': 0.00037716422230005264, 'timestamp': '2025-09-10 02:29:46.618317', 'step': 3716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:46.648114', 'step': 3716, 'epoch': 2} {'type': 'loss', 'content': 0.0031613376922905445, 'timestamp': '2025-09-10 02:29:46.649895', 'step': 3717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:46.679342', 'step': 3717, 'epoch': 2} {'type': 'loss', 'content': 0.0023560025729238987, 'timestamp': '2025-09-10 02:29:46.681151', 'step': 3718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:46.715004', 'step': 3718, 'epoch': 2} {'type': 'loss', 'content': 0.029003635048866272, 'timestamp': '2025-09-10 02:29:46.716845', 'step': 3719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:46.746208', 'step': 3719, 'epoch': 2} {'type': 'loss', 'content': 0.002591664670035243, 'timestamp': '2025-09-10 02:29:46.769401', 'step': 3720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:46.798228', 'step': 3720, 'epoch': 2} {'type': 'loss', 'content': 0.0003384334850125015, 'timestamp': '2025-09-10 02:29:46.800280', 'step': 3721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:46.830554', 'step': 3721, 'epoch': 2} {'type': 'loss', 'content': 0.0429510697722435, 'timestamp': '2025-09-10 02:29:46.832684', 'step': 3722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:46.861862', 'step': 3722, 'epoch': 2} {'type': 'loss', 'content': 0.006672355346381664, 'timestamp': '2025-09-10 02:29:46.865037', 'step': 3723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:46.894006', 'step': 3723, 'epoch': 2} {'type': 'loss', 'content': 0.0007637891103513539, 'timestamp': '2025-09-10 02:29:46.917379', 'step': 3724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:46.946830', 'step': 3724, 'epoch': 2} {'type': 'loss', 'content': 0.026889193803071976, 'timestamp': '2025-09-10 02:29:46.950223', 'step': 3725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:46.980827', 'step': 3725, 'epoch': 2} {'type': 'loss', 'content': 0.019164739176630974, 'timestamp': '2025-09-10 02:29:46.987669', 'step': 3726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:47.020094', 'step': 3726, 'epoch': 2} {'type': 'loss', 'content': 0.03033958561718464, 'timestamp': '2025-09-10 02:29:47.022049', 'step': 3727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:47.051301', 'step': 3727, 'epoch': 2} {'type': 'loss', 'content': 0.023309703916311264, 'timestamp': '2025-09-10 02:29:47.075376', 'step': 3728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:47.104579', 'step': 3728, 'epoch': 2} {'type': 'loss', 'content': 0.001016931259073317, 'timestamp': '2025-09-10 02:29:47.106244', 'step': 3729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:47.135573', 'step': 3729, 'epoch': 2} {'type': 'loss', 'content': 0.04949323460459709, 'timestamp': '2025-09-10 02:29:47.137516', 'step': 3730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:47.166935', 'step': 3730, 'epoch': 2} {'type': 'loss', 'content': 0.007128946948796511, 'timestamp': '2025-09-10 02:29:47.168732', 'step': 3731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:47.197652', 'step': 3731, 'epoch': 2} {'type': 'loss', 'content': 0.000753446773160249, 'timestamp': '2025-09-10 02:29:47.220806', 'step': 3732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:47.250155', 'step': 3732, 'epoch': 2} {'type': 'loss', 'content': 0.004715205170214176, 'timestamp': '2025-09-10 02:29:47.251971', 'step': 3733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:47.281394', 'step': 3733, 'epoch': 2} {'type': 'loss', 'content': 0.0013004514621570706, 'timestamp': '2025-09-10 02:29:47.283421', 'step': 3734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:47.312476', 'step': 3734, 'epoch': 2} {'type': 'loss', 'content': 0.001139021129347384, 'timestamp': '2025-09-10 02:29:47.314290', 'step': 3735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:47.343534', 'step': 3735, 'epoch': 2} {'type': 'loss', 'content': 0.00520856911316514, 'timestamp': '2025-09-10 02:29:47.367138', 'step': 3736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:47.396947', 'step': 3736, 'epoch': 2} {'type': 'loss', 'content': 0.002791530219838023, 'timestamp': '2025-09-10 02:29:47.398787', 'step': 3737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:47.427538', 'step': 3737, 'epoch': 2} {'type': 'loss', 'content': 0.0020063193514943123, 'timestamp': '2025-09-10 02:29:47.429388', 'step': 3738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:47.458038', 'step': 3738, 'epoch': 2} {'type': 'loss', 'content': 0.0008909571333788335, 'timestamp': '2025-09-10 02:29:47.459776', 'step': 3739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:47.488647', 'step': 3739, 'epoch': 2} {'type': 'loss', 'content': 0.0017103239661082625, 'timestamp': '2025-09-10 02:29:47.511715', 'step': 3740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:47.540914', 'step': 3740, 'epoch': 2} {'type': 'loss', 'content': 0.0011297070886939764, 'timestamp': '2025-09-10 02:29:47.542543', 'step': 3741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:47.571826', 'step': 3741, 'epoch': 2} {'type': 'loss', 'content': 0.0004920273786410689, 'timestamp': '2025-09-10 02:29:47.573348', 'step': 3742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:47.602000', 'step': 3742, 'epoch': 2} {'type': 'loss', 'content': 0.0008650109521113336, 'timestamp': '2025-09-10 02:29:47.603784', 'step': 3743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:47.632395', 'step': 3743, 'epoch': 2} {'type': 'loss', 'content': 0.0024125610943883657, 'timestamp': '2025-09-10 02:29:47.655904', 'step': 3744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:47.685282', 'step': 3744, 'epoch': 2} {'type': 'loss', 'content': 0.02579103223979473, 'timestamp': '2025-09-10 02:29:47.687104', 'step': 3745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:47.715859', 'step': 3745, 'epoch': 2} {'type': 'loss', 'content': 0.06585411727428436, 'timestamp': '2025-09-10 02:29:47.717426', 'step': 3746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:47.746867', 'step': 3746, 'epoch': 2} {'type': 'loss', 'content': 0.0029520683456212282, 'timestamp': '2025-09-10 02:29:47.748725', 'step': 3747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:47.777420', 'step': 3747, 'epoch': 2} {'type': 'loss', 'content': 0.001344243180938065, 'timestamp': '2025-09-10 02:29:47.800675', 'step': 3748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:47.829835', 'step': 3748, 'epoch': 2} {'type': 'loss', 'content': 0.000721413642168045, 'timestamp': '2025-09-10 02:29:47.831588', 'step': 3749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:47.860353', 'step': 3749, 'epoch': 2} {'type': 'loss', 'content': 0.0006202346412464976, 'timestamp': '2025-09-10 02:29:47.862153', 'step': 3750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:47.890971', 'step': 3750, 'epoch': 2} {'type': 'loss', 'content': 0.0008002677350305021, 'timestamp': '2025-09-10 02:29:47.892800', 'step': 3751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:47.921734', 'step': 3751, 'epoch': 2} {'type': 'loss', 'content': 0.005500978324562311, 'timestamp': '2025-09-10 02:29:47.944941', 'step': 3752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:47.974037', 'step': 3752, 'epoch': 2} {'type': 'loss', 'content': 0.00033267633989453316, 'timestamp': '2025-09-10 02:29:47.975658', 'step': 3753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:48.004315', 'step': 3753, 'epoch': 2} {'type': 'loss', 'content': 0.00047051571891643107, 'timestamp': '2025-09-10 02:29:48.006069', 'step': 3754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:48.034864', 'step': 3754, 'epoch': 2} {'type': 'loss', 'content': 0.0005520916311070323, 'timestamp': '2025-09-10 02:29:48.036639', 'step': 3755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:48.065164', 'step': 3755, 'epoch': 2} {'type': 'loss', 'content': 0.03412267565727234, 'timestamp': '2025-09-10 02:29:48.088585', 'step': 3756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:48.117569', 'step': 3756, 'epoch': 2} {'type': 'loss', 'content': 0.02543460950255394, 'timestamp': '2025-09-10 02:29:48.119497', 'step': 3757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:48.147836', 'step': 3757, 'epoch': 2} {'type': 'loss', 'content': 0.001249514170922339, 'timestamp': '2025-09-10 02:29:48.149774', 'step': 3758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:48.178619', 'step': 3758, 'epoch': 2} {'type': 'loss', 'content': 0.04360520467162132, 'timestamp': '2025-09-10 02:29:48.180380', 'step': 3759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:48.209597', 'step': 3759, 'epoch': 2} {'type': 'loss', 'content': 0.0005400353693403304, 'timestamp': '2025-09-10 02:29:48.232936', 'step': 3760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:48.261646', 'step': 3760, 'epoch': 2} {'type': 'loss', 'content': 0.0015497934073209763, 'timestamp': '2025-09-10 02:29:48.263209', 'step': 3761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:48.291925', 'step': 3761, 'epoch': 2} {'type': 'loss', 'content': 0.002198526868596673, 'timestamp': '2025-09-10 02:29:48.294963', 'step': 3762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:48.326470', 'step': 3762, 'epoch': 2} {'type': 'loss', 'content': 0.0051878029480576515, 'timestamp': '2025-09-10 02:29:48.328095', 'step': 3763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:48.357323', 'step': 3763, 'epoch': 2} {'type': 'loss', 'content': 0.0006835527601651847, 'timestamp': '2025-09-10 02:29:48.380899', 'step': 3764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:48.409703', 'step': 3764, 'epoch': 2} {'type': 'loss', 'content': 0.01905216835439205, 'timestamp': '2025-09-10 02:29:48.411954', 'step': 3765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:48.441324', 'step': 3765, 'epoch': 2} {'type': 'loss', 'content': 0.0037093120627105236, 'timestamp': '2025-09-10 02:29:48.443656', 'step': 3766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:48.473422', 'step': 3766, 'epoch': 2} {'type': 'loss', 'content': 0.036767762154340744, 'timestamp': '2025-09-10 02:29:48.475200', 'step': 3767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:48.504045', 'step': 3767, 'epoch': 2} {'type': 'loss', 'content': 0.014642768539488316, 'timestamp': '2025-09-10 02:29:48.527431', 'step': 3768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:48.556578', 'step': 3768, 'epoch': 2} {'type': 'loss', 'content': 0.0021930208895355463, 'timestamp': '2025-09-10 02:29:48.558464', 'step': 3769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:48.587071', 'step': 3769, 'epoch': 2} {'type': 'loss', 'content': 0.031093696132302284, 'timestamp': '2025-09-10 02:29:48.588905', 'step': 3770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:48.618606', 'step': 3770, 'epoch': 2} {'type': 'loss', 'content': 0.008391105569899082, 'timestamp': '2025-09-10 02:29:48.620555', 'step': 3771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:48.649543', 'step': 3771, 'epoch': 2} {'type': 'loss', 'content': 0.03555647283792496, 'timestamp': '2025-09-10 02:29:48.672859', 'step': 3772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:48.701939', 'step': 3772, 'epoch': 2} {'type': 'loss', 'content': 0.003431815654039383, 'timestamp': '2025-09-10 02:29:48.703736', 'step': 3773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:48.732755', 'step': 3773, 'epoch': 2} {'type': 'loss', 'content': 0.03433075174689293, 'timestamp': '2025-09-10 02:29:48.734653', 'step': 3774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:48.763553', 'step': 3774, 'epoch': 2} {'type': 'loss', 'content': 0.03858838230371475, 'timestamp': '2025-09-10 02:29:48.765501', 'step': 3775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:48.794600', 'step': 3775, 'epoch': 2} {'type': 'loss', 'content': 0.031171072274446487, 'timestamp': '2025-09-10 02:29:48.818146', 'step': 3776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:48.847106', 'step': 3776, 'epoch': 2} {'type': 'loss', 'content': 0.04389452189207077, 'timestamp': '2025-09-10 02:29:48.848909', 'step': 3777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:48.878098', 'step': 3777, 'epoch': 2} {'type': 'loss', 'content': 0.01032222993671894, 'timestamp': '2025-09-10 02:29:48.879947', 'step': 3778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:48.908971', 'step': 3778, 'epoch': 2} {'type': 'loss', 'content': 0.017558833584189415, 'timestamp': '2025-09-10 02:29:48.910581', 'step': 3779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:48.939384', 'step': 3779, 'epoch': 2} {'type': 'loss', 'content': 0.03611234202980995, 'timestamp': '2025-09-10 02:29:48.962984', 'step': 3780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:48.992051', 'step': 3780, 'epoch': 2} {'type': 'loss', 'content': 0.004638135898858309, 'timestamp': '2025-09-10 02:29:48.994019', 'step': 3781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:49.023396', 'step': 3781, 'epoch': 2} {'type': 'loss', 'content': 0.053978390991687775, 'timestamp': '2025-09-10 02:29:49.025111', 'step': 3782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:49.054357', 'step': 3782, 'epoch': 2} {'type': 'loss', 'content': 0.036939799785614014, 'timestamp': '2025-09-10 02:29:49.056198', 'step': 3783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:49.085199', 'step': 3783, 'epoch': 2} {'type': 'loss', 'content': 0.01262232568114996, 'timestamp': '2025-09-10 02:29:49.108523', 'step': 3784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:49.138461', 'step': 3784, 'epoch': 2} {'type': 'loss', 'content': 0.021416574716567993, 'timestamp': '2025-09-10 02:29:49.140405', 'step': 3785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:49.169704', 'step': 3785, 'epoch': 2} {'type': 'loss', 'content': 0.006002691574394703, 'timestamp': '2025-09-10 02:29:49.171570', 'step': 3786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:49.200609', 'step': 3786, 'epoch': 2} {'type': 'loss', 'content': 0.004044627770781517, 'timestamp': '2025-09-10 02:29:49.202384', 'step': 3787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:49.231207', 'step': 3787, 'epoch': 2} {'type': 'loss', 'content': 0.0032630611676722765, 'timestamp': '2025-09-10 02:29:49.254621', 'step': 3788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:49.284155', 'step': 3788, 'epoch': 2} {'type': 'loss', 'content': 0.007752821780741215, 'timestamp': '2025-09-10 02:29:49.285895', 'step': 3789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:49.316182', 'step': 3789, 'epoch': 2} {'type': 'loss', 'content': 0.028348324820399284, 'timestamp': '2025-09-10 02:29:49.317871', 'step': 3790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:49.347564', 'step': 3790, 'epoch': 2} {'type': 'loss', 'content': 0.007973059080541134, 'timestamp': '2025-09-10 02:29:49.349321', 'step': 3791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:49.379247', 'step': 3791, 'epoch': 2} {'type': 'loss', 'content': 0.027207011356949806, 'timestamp': '2025-09-10 02:29:49.402673', 'step': 3792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:49.432200', 'step': 3792, 'epoch': 2} {'type': 'loss', 'content': 0.02067798376083374, 'timestamp': '2025-09-10 02:29:49.434036', 'step': 3793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:49.464046', 'step': 3793, 'epoch': 2} {'type': 'loss', 'content': 0.006651004310697317, 'timestamp': '2025-09-10 02:29:49.466064', 'step': 3794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:49.495037', 'step': 3794, 'epoch': 2} {'type': 'loss', 'content': 0.009209489449858665, 'timestamp': '2025-09-10 02:29:49.496917', 'step': 3795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:49.526371', 'step': 3795, 'epoch': 2} {'type': 'loss', 'content': 0.001799335121177137, 'timestamp': '2025-09-10 02:29:49.549681', 'step': 3796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:49.579020', 'step': 3796, 'epoch': 2} {'type': 'loss', 'content': 0.02988806553184986, 'timestamp': '2025-09-10 02:29:49.580775', 'step': 3797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:29:49.609572', 'step': 3797, 'epoch': 2} {'type': 'loss', 'content': 0.008221580646932125, 'timestamp': '2025-09-10 02:29:49.611411', 'step': 3798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:49.640373', 'step': 3798, 'epoch': 2} {'type': 'loss', 'content': 0.003645301563665271, 'timestamp': '2025-09-10 02:29:49.642185', 'step': 3799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:49.670691', 'step': 3799, 'epoch': 2} {'type': 'loss', 'content': 0.0063503882847726345, 'timestamp': '2025-09-10 02:29:49.695410', 'step': 3800, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1582003754624}], 'timestamp': '2025-09-10 02:29:51.634475', 'step': 3800, 'epoch': 2} {'type': 'pplx', 'content': 2281451.019019619, 'timestamp': '2025-09-10 02:29:51.639175', 'step': 3800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:51.669669', 'step': 3800, 'epoch': 2} {'type': 'loss', 'content': 0.020798875018954277, 'timestamp': '2025-09-10 02:29:51.671587', 'step': 3801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:51.700573', 'step': 3801, 'epoch': 2} {'type': 'loss', 'content': 0.017957177013158798, 'timestamp': '2025-09-10 02:29:51.702548', 'step': 3802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:51.732205', 'step': 3802, 'epoch': 2} {'type': 'loss', 'content': 0.020725306123495102, 'timestamp': '2025-09-10 02:29:51.736947', 'step': 3803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:51.765926', 'step': 3803, 'epoch': 2} {'type': 'loss', 'content': 0.02220289036631584, 'timestamp': '2025-09-10 02:29:51.789537', 'step': 3804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:51.821550', 'step': 3804, 'epoch': 2} {'type': 'loss', 'content': 0.04601726308465004, 'timestamp': '2025-09-10 02:29:51.823409', 'step': 3805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:51.860711', 'step': 3805, 'epoch': 2} {'type': 'loss', 'content': 0.01492367684841156, 'timestamp': '2025-09-10 02:29:51.862598', 'step': 3806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:51.893518', 'step': 3806, 'epoch': 2} {'type': 'loss', 'content': 0.005222121719270945, 'timestamp': '2025-09-10 02:29:51.895253', 'step': 3807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:51.924993', 'step': 3807, 'epoch': 2} {'type': 'loss', 'content': 0.025623932480812073, 'timestamp': '2025-09-10 02:29:51.948394', 'step': 3808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:51.978212', 'step': 3808, 'epoch': 2} {'type': 'loss', 'content': 0.012954866513609886, 'timestamp': '2025-09-10 02:29:51.980248', 'step': 3809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:52.010042', 'step': 3809, 'epoch': 2} {'type': 'loss', 'content': 0.0058203525841236115, 'timestamp': '2025-09-10 02:29:52.011664', 'step': 3810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:52.042102', 'step': 3810, 'epoch': 2} {'type': 'loss', 'content': 0.006981327198445797, 'timestamp': '2025-09-10 02:29:52.044042', 'step': 3811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:52.072747', 'step': 3811, 'epoch': 2} {'type': 'loss', 'content': 0.009414931759238243, 'timestamp': '2025-09-10 02:29:52.096091', 'step': 3812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:52.124917', 'step': 3812, 'epoch': 2} {'type': 'loss', 'content': 0.007098652422428131, 'timestamp': '2025-09-10 02:29:52.126778', 'step': 3813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:52.155518', 'step': 3813, 'epoch': 2} {'type': 'loss', 'content': 0.03589503839612007, 'timestamp': '2025-09-10 02:29:52.159707', 'step': 3814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:52.190405', 'step': 3814, 'epoch': 2} {'type': 'loss', 'content': 0.006379269063472748, 'timestamp': '2025-09-10 02:29:52.192913', 'step': 3815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:52.223713', 'step': 3815, 'epoch': 2} {'type': 'loss', 'content': 0.08531519025564194, 'timestamp': '2025-09-10 02:29:52.247080', 'step': 3816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:52.278995', 'step': 3816, 'epoch': 2} {'type': 'loss', 'content': 0.06141041964292526, 'timestamp': '2025-09-10 02:29:52.281020', 'step': 3817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:52.309986', 'step': 3817, 'epoch': 2} {'type': 'loss', 'content': 0.02239351160824299, 'timestamp': '2025-09-10 02:29:52.312237', 'step': 3818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:52.344613', 'step': 3818, 'epoch': 2} {'type': 'loss', 'content': 0.03820383921265602, 'timestamp': '2025-09-10 02:29:52.346603', 'step': 3819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:52.375886', 'step': 3819, 'epoch': 2} {'type': 'loss', 'content': 0.0030169629026204348, 'timestamp': '2025-09-10 02:29:52.399026', 'step': 3820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:52.427572', 'step': 3820, 'epoch': 2} {'type': 'loss', 'content': 0.007027463521808386, 'timestamp': '2025-09-10 02:29:52.429532', 'step': 3821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:52.458811', 'step': 3821, 'epoch': 2} {'type': 'loss', 'content': 0.009036152623593807, 'timestamp': '2025-09-10 02:29:52.464291', 'step': 3822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:52.495049', 'step': 3822, 'epoch': 2} {'type': 'loss', 'content': 0.004128528293222189, 'timestamp': '2025-09-10 02:29:52.498604', 'step': 3823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:52.529177', 'step': 3823, 'epoch': 2} {'type': 'loss', 'content': 0.021956181153655052, 'timestamp': '2025-09-10 02:29:52.552670', 'step': 3824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:52.581734', 'step': 3824, 'epoch': 2} {'type': 'loss', 'content': 0.002516851993277669, 'timestamp': '2025-09-10 02:29:52.583442', 'step': 3825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:52.612584', 'step': 3825, 'epoch': 2} {'type': 'loss', 'content': 0.006461195647716522, 'timestamp': '2025-09-10 02:29:52.614319', 'step': 3826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:52.643135', 'step': 3826, 'epoch': 2} {'type': 'loss', 'content': 0.003559216856956482, 'timestamp': '2025-09-10 02:29:52.646534', 'step': 3827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:52.675342', 'step': 3827, 'epoch': 2} {'type': 'loss', 'content': 0.014234405942261219, 'timestamp': '2025-09-10 02:29:52.698562', 'step': 3828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:52.727507', 'step': 3828, 'epoch': 2} {'type': 'loss', 'content': 0.06210624426603317, 'timestamp': '2025-09-10 02:29:52.729422', 'step': 3829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:52.758667', 'step': 3829, 'epoch': 2} {'type': 'loss', 'content': 0.001260876771993935, 'timestamp': '2025-09-10 02:29:52.760594', 'step': 3830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:52.790223', 'step': 3830, 'epoch': 2} {'type': 'loss', 'content': 0.0018323047552257776, 'timestamp': '2025-09-10 02:29:52.792172', 'step': 3831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:52.824074', 'step': 3831, 'epoch': 2} {'type': 'loss', 'content': 0.014975875616073608, 'timestamp': '2025-09-10 02:29:52.850385', 'step': 3832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:52.879599', 'step': 3832, 'epoch': 2} {'type': 'loss', 'content': 0.0037122422363609076, 'timestamp': '2025-09-10 02:29:52.881668', 'step': 3833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:52.910432', 'step': 3833, 'epoch': 2} {'type': 'loss', 'content': 0.004388042259961367, 'timestamp': '2025-09-10 02:29:52.912216', 'step': 3834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:52.941108', 'step': 3834, 'epoch': 2} {'type': 'loss', 'content': 0.008131971582770348, 'timestamp': '2025-09-10 02:29:52.942902', 'step': 3835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:52.972428', 'step': 3835, 'epoch': 2} {'type': 'loss', 'content': 0.0006102101178839803, 'timestamp': '2025-09-10 02:29:52.996118', 'step': 3836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:53.025459', 'step': 3836, 'epoch': 2} {'type': 'loss', 'content': 0.0015979270683601499, 'timestamp': '2025-09-10 02:29:53.027219', 'step': 3837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:53.056985', 'step': 3837, 'epoch': 2} {'type': 'loss', 'content': 0.008117801509797573, 'timestamp': '2025-09-10 02:29:53.058793', 'step': 3838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:53.088699', 'step': 3838, 'epoch': 2} {'type': 'loss', 'content': 0.004349916707724333, 'timestamp': '2025-09-10 02:29:53.090396', 'step': 3839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:53.119904', 'step': 3839, 'epoch': 2} {'type': 'loss', 'content': 0.010418047197163105, 'timestamp': '2025-09-10 02:29:53.143384', 'step': 3840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:53.173892', 'step': 3840, 'epoch': 2} {'type': 'loss', 'content': 0.012020133435726166, 'timestamp': '2025-09-10 02:29:53.176000', 'step': 3841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:53.205072', 'step': 3841, 'epoch': 2} {'type': 'loss', 'content': 0.002090350491926074, 'timestamp': '2025-09-10 02:29:53.206852', 'step': 3842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:53.236065', 'step': 3842, 'epoch': 2} {'type': 'loss', 'content': 0.003925625700503588, 'timestamp': '2025-09-10 02:29:53.237926', 'step': 3843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:53.267326', 'step': 3843, 'epoch': 2} {'type': 'loss', 'content': 0.016873370856046677, 'timestamp': '2025-09-10 02:29:53.290687', 'step': 3844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:53.320058', 'step': 3844, 'epoch': 2} {'type': 'loss', 'content': 0.021129081025719643, 'timestamp': '2025-09-10 02:29:53.321839', 'step': 3845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:53.350549', 'step': 3845, 'epoch': 2} {'type': 'loss', 'content': 0.0009218491613864899, 'timestamp': '2025-09-10 02:29:53.352638', 'step': 3846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:53.381637', 'step': 3846, 'epoch': 2} {'type': 'loss', 'content': 0.030607568100094795, 'timestamp': '2025-09-10 02:29:53.383472', 'step': 3847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:53.413302', 'step': 3847, 'epoch': 2} {'type': 'loss', 'content': 0.0012057321146130562, 'timestamp': '2025-09-10 02:29:53.436599', 'step': 3848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:53.465700', 'step': 3848, 'epoch': 2} {'type': 'loss', 'content': 0.004216120112687349, 'timestamp': '2025-09-10 02:29:53.468969', 'step': 3849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:53.498087', 'step': 3849, 'epoch': 2} {'type': 'loss', 'content': 0.008416402153670788, 'timestamp': '2025-09-10 02:29:53.501077', 'step': 3850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:53.531375', 'step': 3850, 'epoch': 2} {'type': 'loss', 'content': 0.012984177097678185, 'timestamp': '2025-09-10 02:29:53.533247', 'step': 3851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:53.568477', 'step': 3851, 'epoch': 2} {'type': 'loss', 'content': 0.0030218735337257385, 'timestamp': '2025-09-10 02:29:53.591761', 'step': 3852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:53.621706', 'step': 3852, 'epoch': 2} {'type': 'loss', 'content': 0.008891667239367962, 'timestamp': '2025-09-10 02:29:53.623349', 'step': 3853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:53.655170', 'step': 3853, 'epoch': 2} {'type': 'loss', 'content': 0.0016418088926002383, 'timestamp': '2025-09-10 02:29:53.657158', 'step': 3854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:53.694041', 'step': 3854, 'epoch': 2} {'type': 'loss', 'content': 0.0011525944573804736, 'timestamp': '2025-09-10 02:29:53.695959', 'step': 3855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:53.731313', 'step': 3855, 'epoch': 2} {'type': 'loss', 'content': 0.0021038041450083256, 'timestamp': '2025-09-10 02:29:53.754733', 'step': 3856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:53.784034', 'step': 3856, 'epoch': 2} {'type': 'loss', 'content': 0.0032909123692661524, 'timestamp': '2025-09-10 02:29:53.785799', 'step': 3857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:53.814691', 'step': 3857, 'epoch': 2} {'type': 'loss', 'content': 0.0014812155859544873, 'timestamp': '2025-09-10 02:29:53.816838', 'step': 3858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:53.845981', 'step': 3858, 'epoch': 2} {'type': 'loss', 'content': 0.045353662222623825, 'timestamp': '2025-09-10 02:29:53.847683', 'step': 3859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:53.876484', 'step': 3859, 'epoch': 2} {'type': 'loss', 'content': 0.0009044137550517917, 'timestamp': '2025-09-10 02:29:53.899887', 'step': 3860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:53.928640', 'step': 3860, 'epoch': 2} {'type': 'loss', 'content': 0.015513166785240173, 'timestamp': '2025-09-10 02:29:53.930751', 'step': 3861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:53.959644', 'step': 3861, 'epoch': 2} {'type': 'loss', 'content': 0.036323368549346924, 'timestamp': '2025-09-10 02:29:53.961539', 'step': 3862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:53.990198', 'step': 3862, 'epoch': 2} {'type': 'loss', 'content': 0.012267704121768475, 'timestamp': '2025-09-10 02:29:53.991939', 'step': 3863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:54.021313', 'step': 3863, 'epoch': 2} {'type': 'loss', 'content': 0.0019978752825409174, 'timestamp': '2025-09-10 02:29:54.044806', 'step': 3864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:54.073966', 'step': 3864, 'epoch': 2} {'type': 'loss', 'content': 0.004850670695304871, 'timestamp': '2025-09-10 02:29:54.075704', 'step': 3865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:54.105090', 'step': 3865, 'epoch': 2} {'type': 'loss', 'content': 0.01693730801343918, 'timestamp': '2025-09-10 02:29:54.107094', 'step': 3866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:54.137119', 'step': 3866, 'epoch': 2} {'type': 'loss', 'content': 0.0009086922509595752, 'timestamp': '2025-09-10 02:29:54.139024', 'step': 3867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:54.168113', 'step': 3867, 'epoch': 2} {'type': 'loss', 'content': 0.0009111051331274211, 'timestamp': '2025-09-10 02:29:54.191636', 'step': 3868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:54.223928', 'step': 3868, 'epoch': 2} {'type': 'loss', 'content': 0.015068517066538334, 'timestamp': '2025-09-10 02:29:54.225992', 'step': 3869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:54.257797', 'step': 3869, 'epoch': 2} {'type': 'loss', 'content': 0.005425662267953157, 'timestamp': '2025-09-10 02:29:54.259789', 'step': 3870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:54.291107', 'step': 3870, 'epoch': 2} {'type': 'loss', 'content': 0.002478309441357851, 'timestamp': '2025-09-10 02:29:54.293117', 'step': 3871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:54.324421', 'step': 3871, 'epoch': 2} {'type': 'loss', 'content': 0.025403037667274475, 'timestamp': '2025-09-10 02:29:54.354661', 'step': 3872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:54.384872', 'step': 3872, 'epoch': 2} {'type': 'loss', 'content': 0.0018272175220772624, 'timestamp': '2025-09-10 02:29:54.386929', 'step': 3873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:54.416489', 'step': 3873, 'epoch': 2} {'type': 'loss', 'content': 0.00047846572124399245, 'timestamp': '2025-09-10 02:29:54.420552', 'step': 3874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:54.450418', 'step': 3874, 'epoch': 2} {'type': 'loss', 'content': 0.0008692663977853954, 'timestamp': '2025-09-10 02:29:54.453460', 'step': 3875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:54.483631', 'step': 3875, 'epoch': 2} {'type': 'loss', 'content': 0.0019001452019438148, 'timestamp': '2025-09-10 02:29:54.507388', 'step': 3876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:54.540707', 'step': 3876, 'epoch': 2} {'type': 'loss', 'content': 0.0020460376981645823, 'timestamp': '2025-09-10 02:29:54.542603', 'step': 3877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:54.571885', 'step': 3877, 'epoch': 2} {'type': 'loss', 'content': 0.056729283183813095, 'timestamp': '2025-09-10 02:29:54.573888', 'step': 3878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:54.603442', 'step': 3878, 'epoch': 2} {'type': 'loss', 'content': 0.04931720346212387, 'timestamp': '2025-09-10 02:29:54.605175', 'step': 3879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:54.633983', 'step': 3879, 'epoch': 2} {'type': 'loss', 'content': 0.0004895208985544741, 'timestamp': '2025-09-10 02:29:54.657446', 'step': 3880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:54.686807', 'step': 3880, 'epoch': 2} {'type': 'loss', 'content': 0.04446374624967575, 'timestamp': '2025-09-10 02:29:54.688563', 'step': 3881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:54.716972', 'step': 3881, 'epoch': 2} {'type': 'loss', 'content': 0.0009974639397114515, 'timestamp': '2025-09-10 02:29:54.718955', 'step': 3882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:54.750289', 'step': 3882, 'epoch': 2} {'type': 'loss', 'content': 0.009860903024673462, 'timestamp': '2025-09-10 02:29:54.752519', 'step': 3883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:54.782085', 'step': 3883, 'epoch': 2} {'type': 'loss', 'content': 0.0006639487110078335, 'timestamp': '2025-09-10 02:29:54.805713', 'step': 3884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:54.835492', 'step': 3884, 'epoch': 2} {'type': 'loss', 'content': 0.056612931191921234, 'timestamp': '2025-09-10 02:29:54.837386', 'step': 3885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:54.866246', 'step': 3885, 'epoch': 2} {'type': 'loss', 'content': 0.007797067053616047, 'timestamp': '2025-09-10 02:29:54.868101', 'step': 3886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:54.910634', 'step': 3886, 'epoch': 2} {'type': 'loss', 'content': 0.007474190555512905, 'timestamp': '2025-09-10 02:29:54.913767', 'step': 3887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:54.943004', 'step': 3887, 'epoch': 2} {'type': 'loss', 'content': 0.0011330494889989495, 'timestamp': '2025-09-10 02:29:54.966553', 'step': 3888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:54.996465', 'step': 3888, 'epoch': 2} {'type': 'loss', 'content': 0.0007213219068944454, 'timestamp': '2025-09-10 02:29:54.998363', 'step': 3889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:55.040692', 'step': 3889, 'epoch': 2} {'type': 'loss', 'content': 0.006773100234568119, 'timestamp': '2025-09-10 02:29:55.042873', 'step': 3890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:55.072634', 'step': 3890, 'epoch': 2} {'type': 'loss', 'content': 0.0019626482389867306, 'timestamp': '2025-09-10 02:29:55.074678', 'step': 3891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:55.103498', 'step': 3891, 'epoch': 2} {'type': 'loss', 'content': 0.0072317165322601795, 'timestamp': '2025-09-10 02:29:55.126908', 'step': 3892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:55.156107', 'step': 3892, 'epoch': 2} {'type': 'loss', 'content': 0.02815018780529499, 'timestamp': '2025-09-10 02:29:55.158094', 'step': 3893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:55.187345', 'step': 3893, 'epoch': 2} {'type': 'loss', 'content': 0.008434193208813667, 'timestamp': '2025-09-10 02:29:55.189099', 'step': 3894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:55.217858', 'step': 3894, 'epoch': 2} {'type': 'loss', 'content': 0.0047177039086818695, 'timestamp': '2025-09-10 02:29:55.219902', 'step': 3895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:55.248588', 'step': 3895, 'epoch': 2} {'type': 'loss', 'content': 0.0007047757389955223, 'timestamp': '2025-09-10 02:29:55.271894', 'step': 3896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:55.301177', 'step': 3896, 'epoch': 2} {'type': 'loss', 'content': 0.00018565657956060022, 'timestamp': '2025-09-10 02:29:55.303142', 'step': 3897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:55.332479', 'step': 3897, 'epoch': 2} {'type': 'loss', 'content': 0.031223705038428307, 'timestamp': '2025-09-10 02:29:55.334249', 'step': 3898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:55.363809', 'step': 3898, 'epoch': 2} {'type': 'loss', 'content': 0.0027033479418605566, 'timestamp': '2025-09-10 02:29:55.365558', 'step': 3899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:55.394506', 'step': 3899, 'epoch': 2} {'type': 'loss', 'content': 0.009265147149562836, 'timestamp': '2025-09-10 02:29:55.417874', 'step': 3900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:55.446866', 'step': 3900, 'epoch': 2} {'type': 'loss', 'content': 0.00031578371999785304, 'timestamp': '2025-09-10 02:29:55.448756', 'step': 3901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:55.477613', 'step': 3901, 'epoch': 2} {'type': 'loss', 'content': 0.05228476598858833, 'timestamp': '2025-09-10 02:29:55.479579', 'step': 3902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:55.508499', 'step': 3902, 'epoch': 2} {'type': 'loss', 'content': 0.009154691360890865, 'timestamp': '2025-09-10 02:29:55.510203', 'step': 3903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:55.539528', 'step': 3903, 'epoch': 2} {'type': 'loss', 'content': 0.0027278217021375895, 'timestamp': '2025-09-10 02:29:55.563193', 'step': 3904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:55.593016', 'step': 3904, 'epoch': 2} {'type': 'loss', 'content': 0.0007913299486972392, 'timestamp': '2025-09-10 02:29:55.595202', 'step': 3905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:55.624244', 'step': 3905, 'epoch': 2} {'type': 'loss', 'content': 0.013621260412037373, 'timestamp': '2025-09-10 02:29:55.626138', 'step': 3906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:55.654998', 'step': 3906, 'epoch': 2} {'type': 'loss', 'content': 0.0011792125878855586, 'timestamp': '2025-09-10 02:29:55.656886', 'step': 3907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:55.686349', 'step': 3907, 'epoch': 2} {'type': 'loss', 'content': 0.09010230749845505, 'timestamp': '2025-09-10 02:29:55.709764', 'step': 3908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:55.740355', 'step': 3908, 'epoch': 2} {'type': 'loss', 'content': 0.03721442073583603, 'timestamp': '2025-09-10 02:29:55.742235', 'step': 3909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:55.771049', 'step': 3909, 'epoch': 2} {'type': 'loss', 'content': 0.008620529435575008, 'timestamp': '2025-09-10 02:29:55.772938', 'step': 3910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:55.801868', 'step': 3910, 'epoch': 2} {'type': 'loss', 'content': 0.0030499857384711504, 'timestamp': '2025-09-10 02:29:55.803449', 'step': 3911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:55.832522', 'step': 3911, 'epoch': 2} {'type': 'loss', 'content': 0.0010884355287998915, 'timestamp': '2025-09-10 02:29:55.855989', 'step': 3912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:55.885155', 'step': 3912, 'epoch': 2} {'type': 'loss', 'content': 0.009860835038125515, 'timestamp': '2025-09-10 02:29:55.887125', 'step': 3913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:55.916524', 'step': 3913, 'epoch': 2} {'type': 'loss', 'content': 0.01391797699034214, 'timestamp': '2025-09-10 02:29:55.918207', 'step': 3914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:55.947498', 'step': 3914, 'epoch': 2} {'type': 'loss', 'content': 0.0006923922337591648, 'timestamp': '2025-09-10 02:29:55.949523', 'step': 3915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:55.979565', 'step': 3915, 'epoch': 2} {'type': 'loss', 'content': 0.02779540978372097, 'timestamp': '2025-09-10 02:29:56.003168', 'step': 3916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:56.032851', 'step': 3916, 'epoch': 2} {'type': 'loss', 'content': 0.0006811950006522238, 'timestamp': '2025-09-10 02:29:56.034876', 'step': 3917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:56.063704', 'step': 3917, 'epoch': 2} {'type': 'loss', 'content': 0.009511835873126984, 'timestamp': '2025-09-10 02:29:56.065889', 'step': 3918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:56.095395', 'step': 3918, 'epoch': 2} {'type': 'loss', 'content': 0.0006021776353009045, 'timestamp': '2025-09-10 02:29:56.097857', 'step': 3919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:56.127173', 'step': 3919, 'epoch': 2} {'type': 'loss', 'content': 0.043237872421741486, 'timestamp': '2025-09-10 02:29:56.150893', 'step': 3920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:56.180573', 'step': 3920, 'epoch': 2} {'type': 'loss', 'content': 0.03712473064661026, 'timestamp': '2025-09-10 02:29:56.182550', 'step': 3921, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:56.212141', 'step': 3921, 'epoch': 2} {'type': 'loss', 'content': 0.09350786358118057, 'timestamp': '2025-09-10 02:29:56.214113', 'step': 3922, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:56.243208', 'step': 3922, 'epoch': 2} {'type': 'loss', 'content': 0.002584946108981967, 'timestamp': '2025-09-10 02:29:56.245031', 'step': 3923, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:56.274117', 'step': 3923, 'epoch': 2} {'type': 'loss', 'content': 0.00048091966891661286, 'timestamp': '2025-09-10 02:29:56.297775', 'step': 3924, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:56.326627', 'step': 3924, 'epoch': 2} {'type': 'loss', 'content': 0.02092219889163971, 'timestamp': '2025-09-10 02:29:56.328554', 'step': 3925, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:56.357750', 'step': 3925, 'epoch': 2} {'type': 'loss', 'content': 0.016485700383782387, 'timestamp': '2025-09-10 02:29:56.359404', 'step': 3926, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:56.388526', 'step': 3926, 'epoch': 2} {'type': 'loss', 'content': 0.03749697282910347, 'timestamp': '2025-09-10 02:29:56.390567', 'step': 3927, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:56.419573', 'step': 3927, 'epoch': 2} {'type': 'loss', 'content': 0.0031544025987386703, 'timestamp': '2025-09-10 02:29:56.443031', 'step': 3928, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:56.472735', 'step': 3928, 'epoch': 2} {'type': 'loss', 'content': 0.001070962636731565, 'timestamp': '2025-09-10 02:29:56.474549', 'step': 3929, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:56.504046', 'step': 3929, 'epoch': 2} {'type': 'loss', 'content': 0.02318684197962284, 'timestamp': '2025-09-10 02:29:56.506161', 'step': 3930, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:56.535569', 'step': 3930, 'epoch': 2} {'type': 'loss', 'content': 0.0021289566066116095, 'timestamp': '2025-09-10 02:29:56.537595', 'step': 3931, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:56.566299', 'step': 3931, 'epoch': 2} {'type': 'loss', 'content': 0.001401600195094943, 'timestamp': '2025-09-10 02:29:56.589878', 'step': 3932, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:56.618459', 'step': 3932, 'epoch': 2} {'type': 'loss', 'content': 0.015513087622821331, 'timestamp': '2025-09-10 02:29:56.620540', 'step': 3933, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:56.649551', 'step': 3933, 'epoch': 2} {'type': 'loss', 'content': 0.003620770527049899, 'timestamp': '2025-09-10 02:29:56.651542', 'step': 3934, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:56.680508', 'step': 3934, 'epoch': 2} {'type': 'loss', 'content': 0.009311458095908165, 'timestamp': '2025-09-10 02:29:56.682380', 'step': 3935, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:56.711359', 'step': 3935, 'epoch': 2} {'type': 'loss', 'content': 0.0014422357780858874, 'timestamp': '2025-09-10 02:29:56.734709', 'step': 3936, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:56.763344', 'step': 3936, 'epoch': 2} {'type': 'loss', 'content': 0.0011237767757847905, 'timestamp': '2025-09-10 02:29:56.765294', 'step': 3937, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:56.794767', 'step': 3937, 'epoch': 2} {'type': 'loss', 'content': 0.01559279952198267, 'timestamp': '2025-09-10 02:29:56.796479', 'step': 3938, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:56.825995', 'step': 3938, 'epoch': 2} {'type': 'loss', 'content': 0.00151608616579324, 'timestamp': '2025-09-10 02:29:56.827859', 'step': 3939, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:56.856971', 'step': 3939, 'epoch': 2} {'type': 'loss', 'content': 0.01965700089931488, 'timestamp': '2025-09-10 02:29:56.880555', 'step': 3940, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:56.911272', 'step': 3940, 'epoch': 2} {'type': 'loss', 'content': 0.008940416388213634, 'timestamp': '2025-09-10 02:29:56.913290', 'step': 3941, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:56.943191', 'step': 3941, 'epoch': 2} {'type': 'loss', 'content': 0.0021927556954324245, 'timestamp': '2025-09-10 02:29:56.945014', 'step': 3942, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:56.974085', 'step': 3942, 'epoch': 2} {'type': 'loss', 'content': 0.004246424417942762, 'timestamp': '2025-09-10 02:29:56.975850', 'step': 3943, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:57.004702', 'step': 3943, 'epoch': 2} {'type': 'loss', 'content': 0.002561484929174185, 'timestamp': '2025-09-10 02:29:57.028352', 'step': 3944, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:57.056951', 'step': 3944, 'epoch': 2} {'type': 'loss', 'content': 0.015105332247912884, 'timestamp': '2025-09-10 02:29:57.058809', 'step': 3945, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:57.088690', 'step': 3945, 'epoch': 2} {'type': 'loss', 'content': 0.03435353934764862, 'timestamp': '2025-09-10 02:29:57.090499', 'step': 3946, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:57.119919', 'step': 3946, 'epoch': 2} {'type': 'loss', 'content': 0.025656159967184067, 'timestamp': '2025-09-10 02:29:57.121962', 'step': 3947, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:57.151240', 'step': 3947, 'epoch': 2} {'type': 'loss', 'content': 0.020980658009648323, 'timestamp': '2025-09-10 02:29:57.174771', 'step': 3948, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:57.204333', 'step': 3948, 'epoch': 2} {'type': 'loss', 'content': 0.06358266621828079, 'timestamp': '2025-09-10 02:29:57.206328', 'step': 3949, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:57.235447', 'step': 3949, 'epoch': 2} {'type': 'loss', 'content': 0.03147200122475624, 'timestamp': '2025-09-10 02:29:57.237403', 'step': 3950, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:57.267036', 'step': 3950, 'epoch': 2} {'type': 'loss', 'content': 0.010876178741455078, 'timestamp': '2025-09-10 02:29:57.268894', 'step': 3951, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:57.297293', 'step': 3951, 'epoch': 2} {'type': 'loss', 'content': 0.0009858089033514261, 'timestamp': '2025-09-10 02:29:57.320646', 'step': 3952, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1582003754624}], 'timestamp': '2025-09-10 02:29:59.253867', 'step': 3952, 'epoch': 2} {'type': 'pplx', 'content': 2615495.2424767786, 'timestamp': '2025-09-10 02:29:59.255630', 'step': 3952, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:59.284200', 'step': 3952, 'epoch': 2} {'type': 'loss', 'content': 0.0033627322409301996, 'timestamp': '2025-09-10 02:29:59.285977', 'step': 3953, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:59.319115', 'step': 3953, 'epoch': 2} {'type': 'loss', 'content': 0.0013119837967678905, 'timestamp': '2025-09-10 02:29:59.320955', 'step': 3954, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:59.349942', 'step': 3954, 'epoch': 2} {'type': 'loss', 'content': 0.014528962783515453, 'timestamp': '2025-09-10 02:29:59.351674', 'step': 3955, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:59.380871', 'step': 3955, 'epoch': 2} {'type': 'loss', 'content': 0.017321297898888588, 'timestamp': '2025-09-10 02:29:59.404664', 'step': 3956, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:59.433420', 'step': 3956, 'epoch': 2} {'type': 'loss', 'content': 0.01338347140699625, 'timestamp': '2025-09-10 02:29:59.435589', 'step': 3957, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:59.465072', 'step': 3957, 'epoch': 2} {'type': 'loss', 'content': 0.003248179331421852, 'timestamp': '2025-09-10 02:29:59.466793', 'step': 3958, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:59.495807', 'step': 3958, 'epoch': 2} {'type': 'loss', 'content': 0.002999834483489394, 'timestamp': '2025-09-10 02:29:59.497787', 'step': 3959, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:59.526592', 'step': 3959, 'epoch': 2} {'type': 'loss', 'content': 0.012748440727591515, 'timestamp': '2025-09-10 02:29:59.549906', 'step': 3960, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:59.579143', 'step': 3960, 'epoch': 2} {'type': 'loss', 'content': 0.014087346382439137, 'timestamp': '2025-09-10 02:29:59.580887', 'step': 3961, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:59.610044', 'step': 3961, 'epoch': 2} {'type': 'loss', 'content': 0.052782002836465836, 'timestamp': '2025-09-10 02:29:59.612039', 'step': 3962, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:59.640701', 'step': 3962, 'epoch': 2} {'type': 'loss', 'content': 0.04501364752650261, 'timestamp': '2025-09-10 02:29:59.642455', 'step': 3963, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:59.670906', 'step': 3963, 'epoch': 2} {'type': 'loss', 'content': 0.028359297662973404, 'timestamp': '2025-09-10 02:29:59.694504', 'step': 3964, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:29:59.727179', 'step': 3964, 'epoch': 2} {'type': 'loss', 'content': 0.0044990344904363155, 'timestamp': '2025-09-10 02:29:59.729155', 'step': 3965, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:59.758657', 'step': 3965, 'epoch': 2} {'type': 'loss', 'content': 0.0017106970772147179, 'timestamp': '2025-09-10 02:29:59.760672', 'step': 3966, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:59.789864', 'step': 3966, 'epoch': 2} {'type': 'loss', 'content': 0.026996374130249023, 'timestamp': '2025-09-10 02:29:59.792058', 'step': 3967, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:29:59.820716', 'step': 3967, 'epoch': 2} {'type': 'loss', 'content': 0.01697719469666481, 'timestamp': '2025-09-10 02:29:59.844102', 'step': 3968, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:59.873516', 'step': 3968, 'epoch': 2} {'type': 'loss', 'content': 0.0038953584153205156, 'timestamp': '2025-09-10 02:29:59.875597', 'step': 3969, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:59.904591', 'step': 3969, 'epoch': 2} {'type': 'loss', 'content': 0.020404910668730736, 'timestamp': '2025-09-10 02:29:59.906425', 'step': 3970, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:59.935596', 'step': 3970, 'epoch': 2} {'type': 'loss', 'content': 0.002622651169076562, 'timestamp': '2025-09-10 02:29:59.937494', 'step': 3971, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:29:59.966490', 'step': 3971, 'epoch': 2} {'type': 'loss', 'content': 0.0038400557823479176, 'timestamp': '2025-09-10 02:29:59.990075', 'step': 3972, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:00.019724', 'step': 3972, 'epoch': 2} {'type': 'loss', 'content': 0.005181272979825735, 'timestamp': '2025-09-10 02:30:00.021447', 'step': 3973, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:00.051342', 'step': 3973, 'epoch': 2} {'type': 'loss', 'content': 0.008785213343799114, 'timestamp': '2025-09-10 02:30:00.053332', 'step': 3974, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:00.082694', 'step': 3974, 'epoch': 2} {'type': 'loss', 'content': 0.00593586964532733, 'timestamp': '2025-09-10 02:30:00.084519', 'step': 3975, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:00.114700', 'step': 3975, 'epoch': 2} {'type': 'loss', 'content': 0.014440938830375671, 'timestamp': '2025-09-10 02:30:00.138431', 'step': 3976, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:00.167770', 'step': 3976, 'epoch': 2} {'type': 'loss', 'content': 0.02001953311264515, 'timestamp': '2025-09-10 02:30:00.169656', 'step': 3977, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:00.200231', 'step': 3977, 'epoch': 2} {'type': 'loss', 'content': 0.002950613619759679, 'timestamp': '2025-09-10 02:30:00.202410', 'step': 3978, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:00.231642', 'step': 3978, 'epoch': 2} {'type': 'loss', 'content': 0.0004510123108047992, 'timestamp': '2025-09-10 02:30:00.233670', 'step': 3979, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:00.262401', 'step': 3979, 'epoch': 2} {'type': 'loss', 'content': 0.018364567309617996, 'timestamp': '2025-09-10 02:30:00.285923', 'step': 3980, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:00.316267', 'step': 3980, 'epoch': 2} {'type': 'loss', 'content': 0.0015379964606836438, 'timestamp': '2025-09-10 02:30:00.318459', 'step': 3981, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:00.348082', 'step': 3981, 'epoch': 2} {'type': 'loss', 'content': 0.006171368528157473, 'timestamp': '2025-09-10 02:30:00.350030', 'step': 3982, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:00.378518', 'step': 3982, 'epoch': 2} {'type': 'loss', 'content': 0.0326349250972271, 'timestamp': '2025-09-10 02:30:00.380537', 'step': 3983, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:00.409402', 'step': 3983, 'epoch': 2} {'type': 'loss', 'content': 0.001550512621179223, 'timestamp': '2025-09-10 02:30:00.432920', 'step': 3984, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:00.466017', 'step': 3984, 'epoch': 2} {'type': 'loss', 'content': 0.014622099697589874, 'timestamp': '2025-09-10 02:30:00.468062', 'step': 3985, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:00.496905', 'step': 3985, 'epoch': 2} {'type': 'loss', 'content': 0.011592509225010872, 'timestamp': '2025-09-10 02:30:00.499205', 'step': 3986, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:00.528933', 'step': 3986, 'epoch': 2} {'type': 'loss', 'content': 0.0030130650848150253, 'timestamp': '2025-09-10 02:30:00.530600', 'step': 3987, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:00.559724', 'step': 3987, 'epoch': 2} {'type': 'loss', 'content': 0.002759770257398486, 'timestamp': '2025-09-10 02:30:00.583184', 'step': 3988, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:00.612700', 'step': 3988, 'epoch': 2} {'type': 'loss', 'content': 0.001730964402668178, 'timestamp': '2025-09-10 02:30:00.614556', 'step': 3989, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:00.643867', 'step': 3989, 'epoch': 2} {'type': 'loss', 'content': 0.002076812321320176, 'timestamp': '2025-09-10 02:30:00.645839', 'step': 3990, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:00.674781', 'step': 3990, 'epoch': 2} {'type': 'loss', 'content': 0.006203613709658384, 'timestamp': '2025-09-10 02:30:00.677156', 'step': 3991, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:00.706529', 'step': 3991, 'epoch': 2} {'type': 'loss', 'content': 0.014986904338002205, 'timestamp': '2025-09-10 02:30:00.729828', 'step': 3992, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:00.759586', 'step': 3992, 'epoch': 2} {'type': 'loss', 'content': 0.002898239064961672, 'timestamp': '2025-09-10 02:30:00.762937', 'step': 3993, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:00.793584', 'step': 3993, 'epoch': 2} {'type': 'loss', 'content': 0.021653810515999794, 'timestamp': '2025-09-10 02:30:00.795930', 'step': 3994, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:00.824585', 'step': 3994, 'epoch': 2} {'type': 'loss', 'content': 0.02008243091404438, 'timestamp': '2025-09-10 02:30:00.826616', 'step': 3995, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:00.855718', 'step': 3995, 'epoch': 2} {'type': 'loss', 'content': 0.0016952345613390207, 'timestamp': '2025-09-10 02:30:00.879010', 'step': 3996, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:00.908703', 'step': 3996, 'epoch': 2} {'type': 'loss', 'content': 0.0011763189686462283, 'timestamp': '2025-09-10 02:30:00.910599', 'step': 3997, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:00.939259', 'step': 3997, 'epoch': 2} {'type': 'loss', 'content': 0.006132784299552441, 'timestamp': '2025-09-10 02:30:00.941170', 'step': 3998, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:00.970037', 'step': 3998, 'epoch': 2} {'type': 'loss', 'content': 0.030746951699256897, 'timestamp': '2025-09-10 02:30:00.971771', 'step': 3999, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:30:01.001173', 'step': 3999, 'epoch': 2} {'type': 'loss', 'content': 0.004159748088568449, 'timestamp': '2025-09-10 02:30:01.024858', 'step': 4000, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 4000', 'timestamp': '2025-09-10 02:30:05.389163', 'step': 4000, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:05.423101', 'step': 4000, 'epoch': 2} {'type': 'loss', 'content': 0.037195395678281784, 'timestamp': '2025-09-10 02:30:05.425038', 'step': 4001, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:05.454720', 'step': 4001, 'epoch': 2} {'type': 'loss', 'content': 0.03162485733628273, 'timestamp': '2025-09-10 02:30:05.456588', 'step': 4002, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:05.485405', 'step': 4002, 'epoch': 2} {'type': 'loss', 'content': 0.0013071894645690918, 'timestamp': '2025-09-10 02:30:05.487812', 'step': 4003, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:05.517443', 'step': 4003, 'epoch': 2} {'type': 'loss', 'content': 0.0009963825577870011, 'timestamp': '2025-09-10 02:30:05.540927', 'step': 4004, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:05.570267', 'step': 4004, 'epoch': 2} {'type': 'loss', 'content': 0.011100312694907188, 'timestamp': '2025-09-10 02:30:05.572225', 'step': 4005, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:05.601282', 'step': 4005, 'epoch': 2} {'type': 'loss', 'content': 0.0045918854884803295, 'timestamp': '2025-09-10 02:30:05.603669', 'step': 4006, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:05.633206', 'step': 4006, 'epoch': 2} {'type': 'loss', 'content': 0.019447358325123787, 'timestamp': '2025-09-10 02:30:05.635263', 'step': 4007, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:05.664489', 'step': 4007, 'epoch': 2} {'type': 'loss', 'content': 0.0015611470444127917, 'timestamp': '2025-09-10 02:30:05.688488', 'step': 4008, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:05.718131', 'step': 4008, 'epoch': 2} {'type': 'loss', 'content': 0.046501822769641876, 'timestamp': '2025-09-10 02:30:05.720993', 'step': 4009, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:05.750228', 'step': 4009, 'epoch': 2} {'type': 'loss', 'content': 0.023632794618606567, 'timestamp': '2025-09-10 02:30:05.752253', 'step': 4010, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:05.781786', 'step': 4010, 'epoch': 2} {'type': 'loss', 'content': 0.03684673458337784, 'timestamp': '2025-09-10 02:30:05.783548', 'step': 4011, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:05.812482', 'step': 4011, 'epoch': 2} {'type': 'loss', 'content': 0.02587457373738289, 'timestamp': '2025-09-10 02:30:05.836129', 'step': 4012, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:05.865936', 'step': 4012, 'epoch': 2} {'type': 'loss', 'content': 0.06219131126999855, 'timestamp': '2025-09-10 02:30:05.867998', 'step': 4013, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:05.897526', 'step': 4013, 'epoch': 2} {'type': 'loss', 'content': 0.0007624986465089023, 'timestamp': '2025-09-10 02:30:05.899678', 'step': 4014, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:05.928773', 'step': 4014, 'epoch': 2} {'type': 'loss', 'content': 0.021604230627417564, 'timestamp': '2025-09-10 02:30:05.931338', 'step': 4015, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:05.961523', 'step': 4015, 'epoch': 2} {'type': 'loss', 'content': 0.034620076417922974, 'timestamp': '2025-09-10 02:30:05.985695', 'step': 4016, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:06.015508', 'step': 4016, 'epoch': 2} {'type': 'loss', 'content': 0.004367115441709757, 'timestamp': '2025-09-10 02:30:06.017418', 'step': 4017, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:06.046713', 'step': 4017, 'epoch': 2} {'type': 'loss', 'content': 0.05816395953297615, 'timestamp': '2025-09-10 02:30:06.048794', 'step': 4018, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:06.077931', 'step': 4018, 'epoch': 2} {'type': 'loss', 'content': 0.0014941880945116282, 'timestamp': '2025-09-10 02:30:06.079899', 'step': 4019, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:06.108947', 'step': 4019, 'epoch': 2} {'type': 'loss', 'content': 0.021339034661650658, 'timestamp': '2025-09-10 02:30:06.132542', 'step': 4020, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:06.162180', 'step': 4020, 'epoch': 2} {'type': 'loss', 'content': 0.016611723229289055, 'timestamp': '2025-09-10 02:30:06.164273', 'step': 4021, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:06.194192', 'step': 4021, 'epoch': 2} {'type': 'loss', 'content': 0.04329407960176468, 'timestamp': '2025-09-10 02:30:06.196334', 'step': 4022, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:06.225919', 'step': 4022, 'epoch': 2} {'type': 'loss', 'content': 0.0354154147207737, 'timestamp': '2025-09-10 02:30:06.227832', 'step': 4023, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:06.257145', 'step': 4023, 'epoch': 2} {'type': 'loss', 'content': 0.006620544008910656, 'timestamp': '2025-09-10 02:30:06.280719', 'step': 4024, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:06.310229', 'step': 4024, 'epoch': 2} {'type': 'loss', 'content': 0.011618427000939846, 'timestamp': '2025-09-10 02:30:06.312331', 'step': 4025, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:06.341609', 'step': 4025, 'epoch': 2} {'type': 'loss', 'content': 0.02478812262415886, 'timestamp': '2025-09-10 02:30:06.343714', 'step': 4026, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:06.372696', 'step': 4026, 'epoch': 2} {'type': 'loss', 'content': 0.029102688655257225, 'timestamp': '2025-09-10 02:30:06.374686', 'step': 4027, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:06.404049', 'step': 4027, 'epoch': 2} {'type': 'loss', 'content': 0.01803060993552208, 'timestamp': '2025-09-10 02:30:06.427656', 'step': 4028, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:06.457049', 'step': 4028, 'epoch': 2} {'type': 'loss', 'content': 0.019281534478068352, 'timestamp': '2025-09-10 02:30:06.458956', 'step': 4029, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:06.488395', 'step': 4029, 'epoch': 2} {'type': 'loss', 'content': 0.026226535439491272, 'timestamp': '2025-09-10 02:30:06.490984', 'step': 4030, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:06.520182', 'step': 4030, 'epoch': 2} {'type': 'loss', 'content': 0.04052482917904854, 'timestamp': '2025-09-10 02:30:06.522024', 'step': 4031, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:06.551280', 'step': 4031, 'epoch': 2} {'type': 'loss', 'content': 0.013854103162884712, 'timestamp': '2025-09-10 02:30:06.575011', 'step': 4032, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:06.604546', 'step': 4032, 'epoch': 2} {'type': 'loss', 'content': 0.00856628268957138, 'timestamp': '2025-09-10 02:30:06.606638', 'step': 4033, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:06.635760', 'step': 4033, 'epoch': 2} {'type': 'loss', 'content': 0.002442223485559225, 'timestamp': '2025-09-10 02:30:06.637559', 'step': 4034, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:06.666604', 'step': 4034, 'epoch': 2} {'type': 'loss', 'content': 0.014602691866457462, 'timestamp': '2025-09-10 02:30:06.668550', 'step': 4035, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:06.697829', 'step': 4035, 'epoch': 2} {'type': 'loss', 'content': 0.0062775383703410625, 'timestamp': '2025-09-10 02:30:06.721315', 'step': 4036, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:06.751115', 'step': 4036, 'epoch': 2} {'type': 'loss', 'content': 0.005031880922615528, 'timestamp': '2025-09-10 02:30:06.752972', 'step': 4037, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:06.781699', 'step': 4037, 'epoch': 2} {'type': 'loss', 'content': 0.025143790990114212, 'timestamp': '2025-09-10 02:30:06.783719', 'step': 4038, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:06.812823', 'step': 4038, 'epoch': 2} {'type': 'loss', 'content': 0.0027612613048404455, 'timestamp': '2025-09-10 02:30:06.814844', 'step': 4039, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:06.843592', 'step': 4039, 'epoch': 2} {'type': 'loss', 'content': 0.0623394213616848, 'timestamp': '2025-09-10 02:30:06.867227', 'step': 4040, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:06.896099', 'step': 4040, 'epoch': 2} {'type': 'loss', 'content': 0.019657397642731667, 'timestamp': '2025-09-10 02:30:06.898140', 'step': 4041, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:06.927452', 'step': 4041, 'epoch': 2} {'type': 'loss', 'content': 0.0016895385924726725, 'timestamp': '2025-09-10 02:30:06.929561', 'step': 4042, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:06.958634', 'step': 4042, 'epoch': 2} {'type': 'loss', 'content': 0.008014382794499397, 'timestamp': '2025-09-10 02:30:06.960814', 'step': 4043, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:06.990288', 'step': 4043, 'epoch': 2} {'type': 'loss', 'content': 0.001446551294066012, 'timestamp': '2025-09-10 02:30:07.014198', 'step': 4044, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:07.044378', 'step': 4044, 'epoch': 2} {'type': 'loss', 'content': 0.002983215032145381, 'timestamp': '2025-09-10 02:30:07.047649', 'step': 4045, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:07.077646', 'step': 4045, 'epoch': 2} {'type': 'loss', 'content': 0.005527123808860779, 'timestamp': '2025-09-10 02:30:07.079781', 'step': 4046, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:07.109245', 'step': 4046, 'epoch': 2} {'type': 'loss', 'content': 0.027898045256733894, 'timestamp': '2025-09-10 02:30:07.111361', 'step': 4047, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:07.141699', 'step': 4047, 'epoch': 2} {'type': 'loss', 'content': 0.04734370484948158, 'timestamp': '2025-09-10 02:30:07.165618', 'step': 4048, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:07.195633', 'step': 4048, 'epoch': 2} {'type': 'loss', 'content': 0.001743005239404738, 'timestamp': '2025-09-10 02:30:07.197786', 'step': 4049, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:07.227182', 'step': 4049, 'epoch': 2} {'type': 'loss', 'content': 0.028737738728523254, 'timestamp': '2025-09-10 02:30:07.228938', 'step': 4050, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:07.258128', 'step': 4050, 'epoch': 2} {'type': 'loss', 'content': 0.00825404841452837, 'timestamp': '2025-09-10 02:30:07.259833', 'step': 4051, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:07.289425', 'step': 4051, 'epoch': 2} {'type': 'loss', 'content': 0.018694309517741203, 'timestamp': '2025-09-10 02:30:07.312986', 'step': 4052, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:07.342138', 'step': 4052, 'epoch': 2} {'type': 'loss', 'content': 0.016908397898077965, 'timestamp': '2025-09-10 02:30:07.344129', 'step': 4053, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:07.372802', 'step': 4053, 'epoch': 2} {'type': 'loss', 'content': 0.04728404060006142, 'timestamp': '2025-09-10 02:30:07.374655', 'step': 4054, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:07.403481', 'step': 4054, 'epoch': 2} {'type': 'loss', 'content': 0.006686265114694834, 'timestamp': '2025-09-10 02:30:07.405540', 'step': 4055, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:07.434772', 'step': 4055, 'epoch': 2} {'type': 'loss', 'content': 0.0058333114720880985, 'timestamp': '2025-09-10 02:30:07.458095', 'step': 4056, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:07.488058', 'step': 4056, 'epoch': 2} {'type': 'loss', 'content': 0.0021211898420006037, 'timestamp': '2025-09-10 02:30:07.490031', 'step': 4057, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:07.519171', 'step': 4057, 'epoch': 2} {'type': 'loss', 'content': 0.008445287123322487, 'timestamp': '2025-09-10 02:30:07.521344', 'step': 4058, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:07.551285', 'step': 4058, 'epoch': 2} {'type': 'loss', 'content': 0.020289387553930283, 'timestamp': '2025-09-10 02:30:07.553373', 'step': 4059, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:07.583484', 'step': 4059, 'epoch': 2} {'type': 'loss', 'content': 0.03862438723444939, 'timestamp': '2025-09-10 02:30:07.607228', 'step': 4060, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:07.637647', 'step': 4060, 'epoch': 2} {'type': 'loss', 'content': 0.002592772478237748, 'timestamp': '2025-09-10 02:30:07.639569', 'step': 4061, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:07.669307', 'step': 4061, 'epoch': 2} {'type': 'loss', 'content': 0.011541393585503101, 'timestamp': '2025-09-10 02:30:07.671123', 'step': 4062, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:07.700850', 'step': 4062, 'epoch': 2} {'type': 'loss', 'content': 0.03516516089439392, 'timestamp': '2025-09-10 02:30:07.702943', 'step': 4063, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:07.733026', 'step': 4063, 'epoch': 2} {'type': 'loss', 'content': 0.035814959555864334, 'timestamp': '2025-09-10 02:30:07.756584', 'step': 4064, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:07.786384', 'step': 4064, 'epoch': 2} {'type': 'loss', 'content': 0.0230566393584013, 'timestamp': '2025-09-10 02:30:07.788248', 'step': 4065, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:07.817653', 'step': 4065, 'epoch': 2} {'type': 'loss', 'content': 0.0007291626534424722, 'timestamp': '2025-09-10 02:30:07.819866', 'step': 4066, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:07.849222', 'step': 4066, 'epoch': 2} {'type': 'loss', 'content': 0.002776853274554014, 'timestamp': '2025-09-10 02:30:07.851347', 'step': 4067, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:07.880501', 'step': 4067, 'epoch': 2} {'type': 'loss', 'content': 0.02804150991141796, 'timestamp': '2025-09-10 02:30:07.904201', 'step': 4068, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:07.933547', 'step': 4068, 'epoch': 2} {'type': 'loss', 'content': 0.0031647146679461002, 'timestamp': '2025-09-10 02:30:07.935663', 'step': 4069, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:07.964562', 'step': 4069, 'epoch': 2} {'type': 'loss', 'content': 0.00502938125282526, 'timestamp': '2025-09-10 02:30:07.966579', 'step': 4070, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:07.996083', 'step': 4070, 'epoch': 2} {'type': 'loss', 'content': 0.00889466144144535, 'timestamp': '2025-09-10 02:30:07.997927', 'step': 4071, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:08.026726', 'step': 4071, 'epoch': 2} {'type': 'loss', 'content': 0.006143052130937576, 'timestamp': '2025-09-10 02:30:08.050391', 'step': 4072, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:08.079703', 'step': 4072, 'epoch': 2} {'type': 'loss', 'content': 0.01753229834139347, 'timestamp': '2025-09-10 02:30:08.081521', 'step': 4073, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:08.111217', 'step': 4073, 'epoch': 2} {'type': 'loss', 'content': 0.01086281705647707, 'timestamp': '2025-09-10 02:30:08.114070', 'step': 4074, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:08.143042', 'step': 4074, 'epoch': 2} {'type': 'loss', 'content': 0.006167584098875523, 'timestamp': '2025-09-10 02:30:08.145016', 'step': 4075, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:08.174544', 'step': 4075, 'epoch': 2} {'type': 'loss', 'content': 0.00639066006988287, 'timestamp': '2025-09-10 02:30:08.197774', 'step': 4076, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:08.227485', 'step': 4076, 'epoch': 2} {'type': 'loss', 'content': 0.007022134959697723, 'timestamp': '2025-09-10 02:30:08.231298', 'step': 4077, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:08.263727', 'step': 4077, 'epoch': 2} {'type': 'loss', 'content': 0.01550102699548006, 'timestamp': '2025-09-10 02:30:08.266372', 'step': 4078, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:08.304523', 'step': 4078, 'epoch': 2} {'type': 'loss', 'content': 0.02558515965938568, 'timestamp': '2025-09-10 02:30:08.312748', 'step': 4079, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:08.347277', 'step': 4079, 'epoch': 2} {'type': 'loss', 'content': 0.018323184922337532, 'timestamp': '2025-09-10 02:30:08.378126', 'step': 4080, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:08.420699', 'step': 4080, 'epoch': 2} {'type': 'loss', 'content': 0.005207470618188381, 'timestamp': '2025-09-10 02:30:08.428838', 'step': 4081, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:08.464517', 'step': 4081, 'epoch': 2} {'type': 'loss', 'content': 0.003370558610185981, 'timestamp': '2025-09-10 02:30:08.470802', 'step': 4082, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:08.508999', 'step': 4082, 'epoch': 2} {'type': 'loss', 'content': 0.015834081918001175, 'timestamp': '2025-09-10 02:30:08.512237', 'step': 4083, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:08.548932', 'step': 4083, 'epoch': 2} {'type': 'loss', 'content': 0.010369435884058475, 'timestamp': '2025-09-10 02:30:08.581100', 'step': 4084, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:08.617985', 'step': 4084, 'epoch': 2} {'type': 'loss', 'content': 0.004316999111324549, 'timestamp': '2025-09-10 02:30:08.623245', 'step': 4085, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:08.665421', 'step': 4085, 'epoch': 2} {'type': 'loss', 'content': 0.015852492302656174, 'timestamp': '2025-09-10 02:30:08.669507', 'step': 4086, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:08.703154', 'step': 4086, 'epoch': 2} {'type': 'loss', 'content': 0.02087516523897648, 'timestamp': '2025-09-10 02:30:08.711040', 'step': 4087, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:08.750530', 'step': 4087, 'epoch': 2} {'type': 'loss', 'content': 0.0023121817503124475, 'timestamp': '2025-09-10 02:30:08.783557', 'step': 4088, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:08.829775', 'step': 4088, 'epoch': 2} {'type': 'loss', 'content': 0.057893071323633194, 'timestamp': '2025-09-10 02:30:08.833852', 'step': 4089, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:08.872849', 'step': 4089, 'epoch': 2} {'type': 'loss', 'content': 0.004587746690958738, 'timestamp': '2025-09-10 02:30:08.879871', 'step': 4090, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:08.920435', 'step': 4090, 'epoch': 2} {'type': 'loss', 'content': 0.007176821120083332, 'timestamp': '2025-09-10 02:30:08.924410', 'step': 4091, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:08.959251', 'step': 4091, 'epoch': 2} {'type': 'loss', 'content': 0.0320601612329483, 'timestamp': '2025-09-10 02:30:08.984886', 'step': 4092, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:09.020574', 'step': 4092, 'epoch': 2} {'type': 'loss', 'content': 0.024275433272123337, 'timestamp': '2025-09-10 02:30:09.024886', 'step': 4093, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:09.071153', 'step': 4093, 'epoch': 2} {'type': 'loss', 'content': 0.004208702128380537, 'timestamp': '2025-09-10 02:30:09.078754', 'step': 4094, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:09.111703', 'step': 4094, 'epoch': 2} {'type': 'loss', 'content': 0.004779213573783636, 'timestamp': '2025-09-10 02:30:09.119917', 'step': 4095, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:09.165195', 'step': 4095, 'epoch': 2} {'type': 'loss', 'content': 0.01595015451312065, 'timestamp': '2025-09-10 02:30:09.191945', 'step': 4096, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:09.221332', 'step': 4096, 'epoch': 2} {'type': 'loss', 'content': 0.012580021284520626, 'timestamp': '2025-09-10 02:30:09.223564', 'step': 4097, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:09.253415', 'step': 4097, 'epoch': 2} {'type': 'loss', 'content': 0.0033239300828427076, 'timestamp': '2025-09-10 02:30:09.255822', 'step': 4098, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:09.284782', 'step': 4098, 'epoch': 2} {'type': 'loss', 'content': 0.010019478388130665, 'timestamp': '2025-09-10 02:30:09.286948', 'step': 4099, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:09.316625', 'step': 4099, 'epoch': 2} {'type': 'loss', 'content': 0.010247709229588509, 'timestamp': '2025-09-10 02:30:09.340804', 'step': 4100, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:09.373804', 'step': 4100, 'epoch': 2} {'type': 'loss', 'content': 0.0005138172418810427, 'timestamp': '2025-09-10 02:30:09.375810', 'step': 4101, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:09.404939', 'step': 4101, 'epoch': 2} {'type': 'loss', 'content': 0.01599857024848461, 'timestamp': '2025-09-10 02:30:09.406896', 'step': 4102, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:09.436254', 'step': 4102, 'epoch': 2} {'type': 'loss', 'content': 0.00803226139396429, 'timestamp': '2025-09-10 02:30:09.438182', 'step': 4103, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:09.467500', 'step': 4103, 'epoch': 2} {'type': 'loss', 'content': 0.009947722777724266, 'timestamp': '2025-09-10 02:30:09.490951', 'step': 4104, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1582003754624}], 'timestamp': '2025-09-10 02:30:11.387547', 'step': 4104, 'epoch': 2} {'type': 'pplx', 'content': 2449966.308638878, 'timestamp': '2025-09-10 02:30:11.389688', 'step': 4104, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:11.417968', 'step': 4104, 'epoch': 2} {'type': 'loss', 'content': 0.00043871457455679774, 'timestamp': '2025-09-10 02:30:11.419785', 'step': 4105, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:11.448864', 'step': 4105, 'epoch': 2} {'type': 'loss', 'content': 0.010353198274970055, 'timestamp': '2025-09-10 02:30:11.450934', 'step': 4106, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:11.480163', 'step': 4106, 'epoch': 2} {'type': 'loss', 'content': 0.0013044985244050622, 'timestamp': '2025-09-10 02:30:11.481879', 'step': 4107, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:11.510268', 'step': 4107, 'epoch': 2} {'type': 'loss', 'content': 0.021372154355049133, 'timestamp': '2025-09-10 02:30:11.534076', 'step': 4108, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:11.563293', 'step': 4108, 'epoch': 2} {'type': 'loss', 'content': 0.024162491783499718, 'timestamp': '2025-09-10 02:30:11.565368', 'step': 4109, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:11.594834', 'step': 4109, 'epoch': 2} {'type': 'loss', 'content': 0.008508929051458836, 'timestamp': '2025-09-10 02:30:11.597061', 'step': 4110, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:11.626197', 'step': 4110, 'epoch': 2} {'type': 'loss', 'content': 0.0076085105538368225, 'timestamp': '2025-09-10 02:30:11.628426', 'step': 4111, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:11.657190', 'step': 4111, 'epoch': 2} {'type': 'loss', 'content': 0.006037302315235138, 'timestamp': '2025-09-10 02:30:11.681072', 'step': 4112, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:11.710872', 'step': 4112, 'epoch': 2} {'type': 'loss', 'content': 0.001271507004275918, 'timestamp': '2025-09-10 02:30:11.713047', 'step': 4113, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:11.743044', 'step': 4113, 'epoch': 2} {'type': 'loss', 'content': 0.003994493279606104, 'timestamp': '2025-09-10 02:30:11.747138', 'step': 4114, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:11.777037', 'step': 4114, 'epoch': 2} {'type': 'loss', 'content': 0.03419547528028488, 'timestamp': '2025-09-10 02:30:11.784158', 'step': 4115, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:11.818507', 'step': 4115, 'epoch': 2} {'type': 'loss', 'content': 0.012027328833937645, 'timestamp': '2025-09-10 02:30:11.853991', 'step': 4116, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:11.890264', 'step': 4116, 'epoch': 2} {'type': 'loss', 'content': 0.024934466928243637, 'timestamp': '2025-09-10 02:30:11.892812', 'step': 4117, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:11.921809', 'step': 4117, 'epoch': 2} {'type': 'loss', 'content': 0.0010996349155902863, 'timestamp': '2025-09-10 02:30:11.923882', 'step': 4118, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:11.952927', 'step': 4118, 'epoch': 2} {'type': 'loss', 'content': 0.0031524065416306257, 'timestamp': '2025-09-10 02:30:11.957780', 'step': 4119, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:11.986421', 'step': 4119, 'epoch': 2} {'type': 'loss', 'content': 0.014573139138519764, 'timestamp': '2025-09-10 02:30:12.010069', 'step': 4120, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:12.038988', 'step': 4120, 'epoch': 2} {'type': 'loss', 'content': 0.008100205101072788, 'timestamp': '2025-09-10 02:30:12.041085', 'step': 4121, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:12.070339', 'step': 4121, 'epoch': 2} {'type': 'loss', 'content': 0.0010889542754739523, 'timestamp': '2025-09-10 02:30:12.072207', 'step': 4122, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:12.104805', 'step': 4122, 'epoch': 2} {'type': 'loss', 'content': 0.001875544898211956, 'timestamp': '2025-09-10 02:30:12.107124', 'step': 4123, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:12.137524', 'step': 4123, 'epoch': 2} {'type': 'loss', 'content': 0.0022023154888302088, 'timestamp': '2025-09-10 02:30:12.164193', 'step': 4124, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:12.201423', 'step': 4124, 'epoch': 2} {'type': 'loss', 'content': 0.004335514735430479, 'timestamp': '2025-09-10 02:30:12.204110', 'step': 4125, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:12.233130', 'step': 4125, 'epoch': 2} {'type': 'loss', 'content': 0.023035328835248947, 'timestamp': '2025-09-10 02:30:12.234933', 'step': 4126, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:12.264042', 'step': 4126, 'epoch': 2} {'type': 'loss', 'content': 0.0016248939791694283, 'timestamp': '2025-09-10 02:30:12.265987', 'step': 4127, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:12.294883', 'step': 4127, 'epoch': 2} {'type': 'loss', 'content': 0.06430184841156006, 'timestamp': '2025-09-10 02:30:12.319059', 'step': 4128, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:12.359017', 'step': 4128, 'epoch': 2} {'type': 'loss', 'content': 0.002125711413100362, 'timestamp': '2025-09-10 02:30:12.361663', 'step': 4129, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:12.395748', 'step': 4129, 'epoch': 2} {'type': 'loss', 'content': 0.0004497764748521149, 'timestamp': '2025-09-10 02:30:12.397657', 'step': 4130, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:12.426527', 'step': 4130, 'epoch': 2} {'type': 'loss', 'content': 0.010637232102453709, 'timestamp': '2025-09-10 02:30:12.428505', 'step': 4131, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:12.457293', 'step': 4131, 'epoch': 2} {'type': 'loss', 'content': 0.0022402710746973753, 'timestamp': '2025-09-10 02:30:12.480738', 'step': 4132, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:12.510161', 'step': 4132, 'epoch': 2} {'type': 'loss', 'content': 0.0011779237538576126, 'timestamp': '2025-09-10 02:30:12.524879', 'step': 4133, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:12.564302', 'step': 4133, 'epoch': 2} {'type': 'loss', 'content': 0.01739475503563881, 'timestamp': '2025-09-10 02:30:12.566080', 'step': 4134, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:12.597284', 'step': 4134, 'epoch': 2} {'type': 'loss', 'content': 0.0007681874558329582, 'timestamp': '2025-09-10 02:30:12.599319', 'step': 4135, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:12.629535', 'step': 4135, 'epoch': 2} {'type': 'loss', 'content': 0.01480245403945446, 'timestamp': '2025-09-10 02:30:12.653290', 'step': 4136, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:12.682156', 'step': 4136, 'epoch': 2} {'type': 'loss', 'content': 0.0004298565909266472, 'timestamp': '2025-09-10 02:30:12.685402', 'step': 4137, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:12.717830', 'step': 4137, 'epoch': 2} {'type': 'loss', 'content': 0.0003197753394488245, 'timestamp': '2025-09-10 02:30:12.719681', 'step': 4138, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:12.748681', 'step': 4138, 'epoch': 2} {'type': 'loss', 'content': 0.0436149537563324, 'timestamp': '2025-09-10 02:30:12.759945', 'step': 4139, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:12.796877', 'step': 4139, 'epoch': 2} {'type': 'loss', 'content': 0.001417894964106381, 'timestamp': '2025-09-10 02:30:12.820334', 'step': 4140, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:12.850046', 'step': 4140, 'epoch': 2} {'type': 'loss', 'content': 0.00156626314856112, 'timestamp': '2025-09-10 02:30:12.852251', 'step': 4141, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:12.882710', 'step': 4141, 'epoch': 2} {'type': 'loss', 'content': 0.009736338630318642, 'timestamp': '2025-09-10 02:30:12.884690', 'step': 4142, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:12.918107', 'step': 4142, 'epoch': 2} {'type': 'loss', 'content': 0.007936223410069942, 'timestamp': '2025-09-10 02:30:12.919915', 'step': 4143, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:12.950349', 'step': 4143, 'epoch': 2} {'type': 'loss', 'content': 0.00017661228775978088, 'timestamp': '2025-09-10 02:30:12.975296', 'step': 4144, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:13.004870', 'step': 4144, 'epoch': 2} {'type': 'loss', 'content': 0.00022023404017090797, 'timestamp': '2025-09-10 02:30:13.006891', 'step': 4145, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:13.035969', 'step': 4145, 'epoch': 2} {'type': 'loss', 'content': 0.00031622167443856597, 'timestamp': '2025-09-10 02:30:13.037852', 'step': 4146, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:13.078489', 'step': 4146, 'epoch': 2} {'type': 'loss', 'content': 0.03914717584848404, 'timestamp': '2025-09-10 02:30:13.080395', 'step': 4147, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:13.127488', 'step': 4147, 'epoch': 2} {'type': 'loss', 'content': 0.002464776625856757, 'timestamp': '2025-09-10 02:30:13.154753', 'step': 4148, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:13.190110', 'step': 4148, 'epoch': 2} {'type': 'loss', 'content': 0.0040261102840304375, 'timestamp': '2025-09-10 02:30:13.192118', 'step': 4149, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:13.222773', 'step': 4149, 'epoch': 2} {'type': 'loss', 'content': 0.014154909178614616, 'timestamp': '2025-09-10 02:30:13.224848', 'step': 4150, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:13.254083', 'step': 4150, 'epoch': 2} {'type': 'loss', 'content': 0.0015958038857206702, 'timestamp': '2025-09-10 02:30:13.256099', 'step': 4151, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:13.285702', 'step': 4151, 'epoch': 2} {'type': 'loss', 'content': 0.0005385707481764257, 'timestamp': '2025-09-10 02:30:13.309381', 'step': 4152, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:13.341823', 'step': 4152, 'epoch': 2} {'type': 'loss', 'content': 0.0009124533389694989, 'timestamp': '2025-09-10 02:30:13.344028', 'step': 4153, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:13.374630', 'step': 4153, 'epoch': 2} {'type': 'loss', 'content': 0.008126592263579369, 'timestamp': '2025-09-10 02:30:13.378552', 'step': 4154, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:13.407949', 'step': 4154, 'epoch': 2} {'type': 'loss', 'content': 0.023636531084775925, 'timestamp': '2025-09-10 02:30:13.409958', 'step': 4155, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:13.439545', 'step': 4155, 'epoch': 2} {'type': 'loss', 'content': 0.029642626643180847, 'timestamp': '2025-09-10 02:30:13.464968', 'step': 4156, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:13.494112', 'step': 4156, 'epoch': 2} {'type': 'loss', 'content': 0.0045542968437075615, 'timestamp': '2025-09-10 02:30:13.495846', 'step': 4157, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:13.524626', 'step': 4157, 'epoch': 2} {'type': 'loss', 'content': 0.019587215036153793, 'timestamp': '2025-09-10 02:30:13.526673', 'step': 4158, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:13.561391', 'step': 4158, 'epoch': 2} {'type': 'loss', 'content': 0.015651974827051163, 'timestamp': '2025-09-10 02:30:13.565002', 'step': 4159, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:13.598546', 'step': 4159, 'epoch': 2} {'type': 'loss', 'content': 0.038601044565439224, 'timestamp': '2025-09-10 02:30:13.622042', 'step': 4160, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:13.653561', 'step': 4160, 'epoch': 2} {'type': 'loss', 'content': 0.001739536295644939, 'timestamp': '2025-09-10 02:30:13.655309', 'step': 4161, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:13.684721', 'step': 4161, 'epoch': 2} {'type': 'loss', 'content': 0.006266446318477392, 'timestamp': '2025-09-10 02:30:13.686939', 'step': 4162, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:13.723160', 'step': 4162, 'epoch': 2} {'type': 'loss', 'content': 0.015138087794184685, 'timestamp': '2025-09-10 02:30:13.725148', 'step': 4163, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:13.754529', 'step': 4163, 'epoch': 2} {'type': 'loss', 'content': 0.003117672633379698, 'timestamp': '2025-09-10 02:30:13.779269', 'step': 4164, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:13.808144', 'step': 4164, 'epoch': 2} {'type': 'loss', 'content': 0.007980624213814735, 'timestamp': '2025-09-10 02:30:13.810139', 'step': 4165, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:13.840569', 'step': 4165, 'epoch': 2} {'type': 'loss', 'content': 0.0037680792156606913, 'timestamp': '2025-09-10 02:30:13.842676', 'step': 4166, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:13.874020', 'step': 4166, 'epoch': 2} {'type': 'loss', 'content': 0.0011201956076547503, 'timestamp': '2025-09-10 02:30:13.875914', 'step': 4167, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:13.904770', 'step': 4167, 'epoch': 2} {'type': 'loss', 'content': 0.015456787310540676, 'timestamp': '2025-09-10 02:30:13.928479', 'step': 4168, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:13.957777', 'step': 4168, 'epoch': 2} {'type': 'loss', 'content': 0.0004954669857397676, 'timestamp': '2025-09-10 02:30:13.959967', 'step': 4169, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:13.988843', 'step': 4169, 'epoch': 2} {'type': 'loss', 'content': 0.0020157850813120604, 'timestamp': '2025-09-10 02:30:13.990643', 'step': 4170, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:14.020185', 'step': 4170, 'epoch': 2} {'type': 'loss', 'content': 0.020275894552469254, 'timestamp': '2025-09-10 02:30:14.022498', 'step': 4171, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:14.051651', 'step': 4171, 'epoch': 2} {'type': 'loss', 'content': 0.005076683592051268, 'timestamp': '2025-09-10 02:30:14.074854', 'step': 4172, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:14.104260', 'step': 4172, 'epoch': 2} {'type': 'loss', 'content': 0.013356396928429604, 'timestamp': '2025-09-10 02:30:14.106007', 'step': 4173, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:14.134831', 'step': 4173, 'epoch': 2} {'type': 'loss', 'content': 0.023072008043527603, 'timestamp': '2025-09-10 02:30:14.136593', 'step': 4174, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:14.165929', 'step': 4174, 'epoch': 2} {'type': 'loss', 'content': 0.0010532446904107928, 'timestamp': '2025-09-10 02:30:14.167837', 'step': 4175, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:14.197056', 'step': 4175, 'epoch': 2} {'type': 'loss', 'content': 0.00019865957438014448, 'timestamp': '2025-09-10 02:30:14.220909', 'step': 4176, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:14.250441', 'step': 4176, 'epoch': 2} {'type': 'loss', 'content': 0.011448433622717857, 'timestamp': '2025-09-10 02:30:14.252483', 'step': 4177, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:14.281681', 'step': 4177, 'epoch': 2} {'type': 'loss', 'content': 0.001944276737049222, 'timestamp': '2025-09-10 02:30:14.283845', 'step': 4178, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:14.312664', 'step': 4178, 'epoch': 2} {'type': 'loss', 'content': 0.00034151505678892136, 'timestamp': '2025-09-10 02:30:14.314810', 'step': 4179, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:14.344090', 'step': 4179, 'epoch': 2} {'type': 'loss', 'content': 0.00792448129504919, 'timestamp': '2025-09-10 02:30:14.367461', 'step': 4180, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:14.397006', 'step': 4180, 'epoch': 2} {'type': 'loss', 'content': 0.0011224746704101562, 'timestamp': '2025-09-10 02:30:14.399191', 'step': 4181, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:14.428694', 'step': 4181, 'epoch': 2} {'type': 'loss', 'content': 0.0001815783034544438, 'timestamp': '2025-09-10 02:30:14.430991', 'step': 4182, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:14.460005', 'step': 4182, 'epoch': 2} {'type': 'loss', 'content': 0.0010939531493932009, 'timestamp': '2025-09-10 02:30:14.462336', 'step': 4183, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:14.492914', 'step': 4183, 'epoch': 2} {'type': 'loss', 'content': 0.010164668783545494, 'timestamp': '2025-09-10 02:30:14.516399', 'step': 4184, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:14.546375', 'step': 4184, 'epoch': 2} {'type': 'loss', 'content': 0.0004541015368886292, 'timestamp': '2025-09-10 02:30:14.548297', 'step': 4185, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:14.577534', 'step': 4185, 'epoch': 2} {'type': 'loss', 'content': 0.02757410891354084, 'timestamp': '2025-09-10 02:30:14.581439', 'step': 4186, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:14.612662', 'step': 4186, 'epoch': 2} {'type': 'loss', 'content': 0.0029620155692100525, 'timestamp': '2025-09-10 02:30:14.614595', 'step': 4187, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:14.643698', 'step': 4187, 'epoch': 2} {'type': 'loss', 'content': 0.03115958347916603, 'timestamp': '2025-09-10 02:30:14.667753', 'step': 4188, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:14.696704', 'step': 4188, 'epoch': 2} {'type': 'loss', 'content': 0.02462959475815296, 'timestamp': '2025-09-10 02:30:14.699523', 'step': 4189, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:14.728947', 'step': 4189, 'epoch': 2} {'type': 'loss', 'content': 0.008932234719395638, 'timestamp': '2025-09-10 02:30:14.730988', 'step': 4190, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:14.760011', 'step': 4190, 'epoch': 2} {'type': 'loss', 'content': 0.02919713594019413, 'timestamp': '2025-09-10 02:30:14.762122', 'step': 4191, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:14.791547', 'step': 4191, 'epoch': 2} {'type': 'loss', 'content': 0.0017417669296264648, 'timestamp': '2025-09-10 02:30:14.815099', 'step': 4192, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:14.844924', 'step': 4192, 'epoch': 2} {'type': 'loss', 'content': 0.003337780013680458, 'timestamp': '2025-09-10 02:30:14.847005', 'step': 4193, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:14.875835', 'step': 4193, 'epoch': 2} {'type': 'loss', 'content': 0.002467533340677619, 'timestamp': '2025-09-10 02:30:14.877948', 'step': 4194, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:14.907004', 'step': 4194, 'epoch': 2} {'type': 'loss', 'content': 0.0005795446340925992, 'timestamp': '2025-09-10 02:30:14.909049', 'step': 4195, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:14.938518', 'step': 4195, 'epoch': 2} {'type': 'loss', 'content': 0.0007786169881001115, 'timestamp': '2025-09-10 02:30:14.961881', 'step': 4196, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:14.990997', 'step': 4196, 'epoch': 2} {'type': 'loss', 'content': 0.003195093246176839, 'timestamp': '2025-09-10 02:30:14.992995', 'step': 4197, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:15.022041', 'step': 4197, 'epoch': 2} {'type': 'loss', 'content': 0.006263344548642635, 'timestamp': '2025-09-10 02:30:15.024331', 'step': 4198, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:15.053425', 'step': 4198, 'epoch': 2} {'type': 'loss', 'content': 0.0008713462157174945, 'timestamp': '2025-09-10 02:30:15.055305', 'step': 4199, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:15.084449', 'step': 4199, 'epoch': 2} {'type': 'loss', 'content': 0.017755350098013878, 'timestamp': '2025-09-10 02:30:15.108050', 'step': 4200, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:15.137303', 'step': 4200, 'epoch': 2} {'type': 'loss', 'content': 0.005264886189252138, 'timestamp': '2025-09-10 02:30:15.139363', 'step': 4201, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:15.169105', 'step': 4201, 'epoch': 2} {'type': 'loss', 'content': 0.00546374311670661, 'timestamp': '2025-09-10 02:30:15.171006', 'step': 4202, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:15.199895', 'step': 4202, 'epoch': 2} {'type': 'loss', 'content': 0.0007607027655467391, 'timestamp': '2025-09-10 02:30:15.202063', 'step': 4203, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:15.231114', 'step': 4203, 'epoch': 2} {'type': 'loss', 'content': 0.005106969736516476, 'timestamp': '2025-09-10 02:30:15.254614', 'step': 4204, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:15.284142', 'step': 4204, 'epoch': 2} {'type': 'loss', 'content': 0.004172091837972403, 'timestamp': '2025-09-10 02:30:15.285955', 'step': 4205, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:15.315039', 'step': 4205, 'epoch': 2} {'type': 'loss', 'content': 0.00981372781097889, 'timestamp': '2025-09-10 02:30:15.317125', 'step': 4206, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:15.346326', 'step': 4206, 'epoch': 2} {'type': 'loss', 'content': 0.004303614143282175, 'timestamp': '2025-09-10 02:30:15.348388', 'step': 4207, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:15.377635', 'step': 4207, 'epoch': 2} {'type': 'loss', 'content': 0.00102977582719177, 'timestamp': '2025-09-10 02:30:15.400934', 'step': 4208, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:15.430102', 'step': 4208, 'epoch': 2} {'type': 'loss', 'content': 0.020137446001172066, 'timestamp': '2025-09-10 02:30:15.432394', 'step': 4209, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:15.461473', 'step': 4209, 'epoch': 2} {'type': 'loss', 'content': 0.010775747708976269, 'timestamp': '2025-09-10 02:30:15.463351', 'step': 4210, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:15.491935', 'step': 4210, 'epoch': 2} {'type': 'loss', 'content': 0.0025170750450342894, 'timestamp': '2025-09-10 02:30:15.493988', 'step': 4211, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:15.522818', 'step': 4211, 'epoch': 2} {'type': 'loss', 'content': 0.005516665522009134, 'timestamp': '2025-09-10 02:30:15.546569', 'step': 4212, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:15.576204', 'step': 4212, 'epoch': 2} {'type': 'loss', 'content': 0.000951143098063767, 'timestamp': '2025-09-10 02:30:15.578240', 'step': 4213, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:15.607924', 'step': 4213, 'epoch': 2} {'type': 'loss', 'content': 0.0008516961825080216, 'timestamp': '2025-09-10 02:30:15.609790', 'step': 4214, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:15.638609', 'step': 4214, 'epoch': 2} {'type': 'loss', 'content': 0.04331762716174126, 'timestamp': '2025-09-10 02:30:15.640841', 'step': 4215, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:15.670108', 'step': 4215, 'epoch': 2} {'type': 'loss', 'content': 0.06728193908929825, 'timestamp': '2025-09-10 02:30:15.693741', 'step': 4216, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:15.723549', 'step': 4216, 'epoch': 2} {'type': 'loss', 'content': 0.0026774972211569548, 'timestamp': '2025-09-10 02:30:15.725659', 'step': 4217, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:15.755397', 'step': 4217, 'epoch': 2} {'type': 'loss', 'content': 0.003880779491737485, 'timestamp': '2025-09-10 02:30:15.757539', 'step': 4218, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:15.786812', 'step': 4218, 'epoch': 2} {'type': 'loss', 'content': 0.0013745512114837766, 'timestamp': '2025-09-10 02:30:15.788962', 'step': 4219, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:15.818124', 'step': 4219, 'epoch': 2} {'type': 'loss', 'content': 0.0004106538253836334, 'timestamp': '2025-09-10 02:30:15.841688', 'step': 4220, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:15.871171', 'step': 4220, 'epoch': 2} {'type': 'loss', 'content': 0.00025766046019271016, 'timestamp': '2025-09-10 02:30:15.873415', 'step': 4221, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:15.902496', 'step': 4221, 'epoch': 2} {'type': 'loss', 'content': 0.007885267026722431, 'timestamp': '2025-09-10 02:30:15.904517', 'step': 4222, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:15.933918', 'step': 4222, 'epoch': 2} {'type': 'loss', 'content': 0.00033780946978367865, 'timestamp': '2025-09-10 02:30:15.936034', 'step': 4223, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:15.964848', 'step': 4223, 'epoch': 2} {'type': 'loss', 'content': 0.012453519739210606, 'timestamp': '2025-09-10 02:30:15.988501', 'step': 4224, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:16.018114', 'step': 4224, 'epoch': 2} {'type': 'loss', 'content': 0.0007972000166773796, 'timestamp': '2025-09-10 02:30:16.020157', 'step': 4225, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:16.049596', 'step': 4225, 'epoch': 2} {'type': 'loss', 'content': 0.0008176401606760919, 'timestamp': '2025-09-10 02:30:16.051534', 'step': 4226, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:16.080591', 'step': 4226, 'epoch': 2} {'type': 'loss', 'content': 0.0032796438317745924, 'timestamp': '2025-09-10 02:30:16.082988', 'step': 4227, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:16.112213', 'step': 4227, 'epoch': 2} {'type': 'loss', 'content': 0.002389345783740282, 'timestamp': '2025-09-10 02:30:16.135785', 'step': 4228, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:16.165456', 'step': 4228, 'epoch': 2} {'type': 'loss', 'content': 0.00020894188492093235, 'timestamp': '2025-09-10 02:30:16.167503', 'step': 4229, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:16.196451', 'step': 4229, 'epoch': 2} {'type': 'loss', 'content': 0.009093990549445152, 'timestamp': '2025-09-10 02:30:16.198623', 'step': 4230, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:16.228120', 'step': 4230, 'epoch': 2} {'type': 'loss', 'content': 0.0009998397435992956, 'timestamp': '2025-09-10 02:30:16.230550', 'step': 4231, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:16.259696', 'step': 4231, 'epoch': 2} {'type': 'loss', 'content': 0.0010820318711921573, 'timestamp': '2025-09-10 02:30:16.283185', 'step': 4232, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:16.312859', 'step': 4232, 'epoch': 2} {'type': 'loss', 'content': 0.0028439860325306654, 'timestamp': '2025-09-10 02:30:16.315029', 'step': 4233, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:16.344140', 'step': 4233, 'epoch': 2} {'type': 'loss', 'content': 0.0011132077779620886, 'timestamp': '2025-09-10 02:30:16.346217', 'step': 4234, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:16.376167', 'step': 4234, 'epoch': 2} {'type': 'loss', 'content': 0.007233327720314264, 'timestamp': '2025-09-10 02:30:16.378345', 'step': 4235, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:16.407160', 'step': 4235, 'epoch': 2} {'type': 'loss', 'content': 0.026646794751286507, 'timestamp': '2025-09-10 02:30:16.431310', 'step': 4236, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:16.460332', 'step': 4236, 'epoch': 2} {'type': 'loss', 'content': 0.0025783206801861525, 'timestamp': '2025-09-10 02:30:16.462424', 'step': 4237, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:16.491788', 'step': 4237, 'epoch': 2} {'type': 'loss', 'content': 0.0003957781591452658, 'timestamp': '2025-09-10 02:30:16.493605', 'step': 4238, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:16.522360', 'step': 4238, 'epoch': 2} {'type': 'loss', 'content': 0.0038249988574534655, 'timestamp': '2025-09-10 02:30:16.524716', 'step': 4239, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:16.553918', 'step': 4239, 'epoch': 2} {'type': 'loss', 'content': 0.020384501665830612, 'timestamp': '2025-09-10 02:30:16.577388', 'step': 4240, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:16.606976', 'step': 4240, 'epoch': 2} {'type': 'loss', 'content': 0.0016678055981174111, 'timestamp': '2025-09-10 02:30:16.609261', 'step': 4241, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:16.638710', 'step': 4241, 'epoch': 2} {'type': 'loss', 'content': 0.0007309973007068038, 'timestamp': '2025-09-10 02:30:16.640683', 'step': 4242, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:16.669705', 'step': 4242, 'epoch': 2} {'type': 'loss', 'content': 0.0005048222956247628, 'timestamp': '2025-09-10 02:30:16.671847', 'step': 4243, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:16.700767', 'step': 4243, 'epoch': 2} {'type': 'loss', 'content': 0.03268107771873474, 'timestamp': '2025-09-10 02:30:16.724174', 'step': 4244, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:16.753375', 'step': 4244, 'epoch': 2} {'type': 'loss', 'content': 0.0005758335464634001, 'timestamp': '2025-09-10 02:30:16.755270', 'step': 4245, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:16.784658', 'step': 4245, 'epoch': 2} {'type': 'loss', 'content': 0.000780436210334301, 'timestamp': '2025-09-10 02:30:16.786610', 'step': 4246, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:16.820105', 'step': 4246, 'epoch': 2} {'type': 'loss', 'content': 0.03583789989352226, 'timestamp': '2025-09-10 02:30:16.822404', 'step': 4247, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:16.851476', 'step': 4247, 'epoch': 2} {'type': 'loss', 'content': 0.01857677660882473, 'timestamp': '2025-09-10 02:30:16.874865', 'step': 4248, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:16.905171', 'step': 4248, 'epoch': 2} {'type': 'loss', 'content': 0.00383751024492085, 'timestamp': '2025-09-10 02:30:16.907683', 'step': 4249, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:16.937595', 'step': 4249, 'epoch': 2} {'type': 'loss', 'content': 0.008934049867093563, 'timestamp': '2025-09-10 02:30:16.939900', 'step': 4250, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:16.970167', 'step': 4250, 'epoch': 2} {'type': 'loss', 'content': 0.00574998976662755, 'timestamp': '2025-09-10 02:30:16.972182', 'step': 4251, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:17.001387', 'step': 4251, 'epoch': 2} {'type': 'loss', 'content': 0.030393557623028755, 'timestamp': '2025-09-10 02:30:17.024740', 'step': 4252, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:17.054302', 'step': 4252, 'epoch': 2} {'type': 'loss', 'content': 0.006191540509462357, 'timestamp': '2025-09-10 02:30:17.056165', 'step': 4253, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:17.085071', 'step': 4253, 'epoch': 2} {'type': 'loss', 'content': 0.0160320233553648, 'timestamp': '2025-09-10 02:30:17.087045', 'step': 4254, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:17.116130', 'step': 4254, 'epoch': 2} {'type': 'loss', 'content': 0.0027475282549858093, 'timestamp': '2025-09-10 02:30:17.118108', 'step': 4255, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:17.146999', 'step': 4255, 'epoch': 2} {'type': 'loss', 'content': 0.00025519152404740453, 'timestamp': '2025-09-10 02:30:17.170621', 'step': 4256, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1582003754624}], 'timestamp': '2025-09-10 02:30:19.209633', 'step': 4256, 'epoch': 2} {'type': 'pplx', 'content': 2666502.8418611083, 'timestamp': '2025-09-10 02:30:19.214203', 'step': 4256, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:19.242527', 'step': 4256, 'epoch': 2} {'type': 'loss', 'content': 0.004566500429064035, 'timestamp': '2025-09-10 02:30:19.245068', 'step': 4257, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:19.275303', 'step': 4257, 'epoch': 2} {'type': 'loss', 'content': 0.0001808918605092913, 'timestamp': '2025-09-10 02:30:19.277146', 'step': 4258, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:19.305919', 'step': 4258, 'epoch': 2} {'type': 'loss', 'content': 0.0013139968505129218, 'timestamp': '2025-09-10 02:30:19.310650', 'step': 4259, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:19.345186', 'step': 4259, 'epoch': 2} {'type': 'loss', 'content': 0.005284997168928385, 'timestamp': '2025-09-10 02:30:19.368955', 'step': 4260, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:19.400220', 'step': 4260, 'epoch': 2} {'type': 'loss', 'content': 0.005157732404768467, 'timestamp': '2025-09-10 02:30:19.402305', 'step': 4261, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:19.431676', 'step': 4261, 'epoch': 2} {'type': 'loss', 'content': 0.004276688676327467, 'timestamp': '2025-09-10 02:30:19.433711', 'step': 4262, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:19.463536', 'step': 4262, 'epoch': 2} {'type': 'loss', 'content': 0.006970508955419064, 'timestamp': '2025-09-10 02:30:19.467108', 'step': 4263, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:19.496949', 'step': 4263, 'epoch': 2} {'type': 'loss', 'content': 0.0032287253998219967, 'timestamp': '2025-09-10 02:30:19.527243', 'step': 4264, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:19.557984', 'step': 4264, 'epoch': 2} {'type': 'loss', 'content': 0.006718222517520189, 'timestamp': '2025-09-10 02:30:19.559824', 'step': 4265, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:19.588446', 'step': 4265, 'epoch': 2} {'type': 'loss', 'content': 0.05021723359823227, 'timestamp': '2025-09-10 02:30:19.592381', 'step': 4266, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:19.622417', 'step': 4266, 'epoch': 2} {'type': 'loss', 'content': 0.014577167108654976, 'timestamp': '2025-09-10 02:30:19.624560', 'step': 4267, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:19.653604', 'step': 4267, 'epoch': 2} {'type': 'loss', 'content': 0.006113509181886911, 'timestamp': '2025-09-10 02:30:19.677278', 'step': 4268, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:19.706570', 'step': 4268, 'epoch': 2} {'type': 'loss', 'content': 0.025011321529746056, 'timestamp': '2025-09-10 02:30:19.709098', 'step': 4269, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:19.741519', 'step': 4269, 'epoch': 2} {'type': 'loss', 'content': 0.0031119210179895163, 'timestamp': '2025-09-10 02:30:19.744035', 'step': 4270, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:19.781893', 'step': 4270, 'epoch': 2} {'type': 'loss', 'content': 0.0006158011383377016, 'timestamp': '2025-09-10 02:30:19.784069', 'step': 4271, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:19.813432', 'step': 4271, 'epoch': 2} {'type': 'loss', 'content': 0.00767070846632123, 'timestamp': '2025-09-10 02:30:19.837905', 'step': 4272, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:19.875696', 'step': 4272, 'epoch': 2} {'type': 'loss', 'content': 0.005195465870201588, 'timestamp': '2025-09-10 02:30:19.877980', 'step': 4273, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:19.907036', 'step': 4273, 'epoch': 2} {'type': 'loss', 'content': 0.021231412887573242, 'timestamp': '2025-09-10 02:30:19.909450', 'step': 4274, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:19.938870', 'step': 4274, 'epoch': 2} {'type': 'loss', 'content': 0.017993109300732613, 'timestamp': '2025-09-10 02:30:19.941365', 'step': 4275, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:19.977389', 'step': 4275, 'epoch': 2} {'type': 'loss', 'content': 0.0028719205874949694, 'timestamp': '2025-09-10 02:30:20.000980', 'step': 4276, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:20.030909', 'step': 4276, 'epoch': 2} {'type': 'loss', 'content': 0.0008749695844016969, 'timestamp': '2025-09-10 02:30:20.032815', 'step': 4277, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:20.061689', 'step': 4277, 'epoch': 2} {'type': 'loss', 'content': 0.05861704796552658, 'timestamp': '2025-09-10 02:30:20.063723', 'step': 4278, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:20.094393', 'step': 4278, 'epoch': 2} {'type': 'loss', 'content': 0.0005885810824111104, 'timestamp': '2025-09-10 02:30:20.096630', 'step': 4279, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:20.125499', 'step': 4279, 'epoch': 2} {'type': 'loss', 'content': 0.009852551855146885, 'timestamp': '2025-09-10 02:30:20.150320', 'step': 4280, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:20.185539', 'step': 4280, 'epoch': 2} {'type': 'loss', 'content': 0.0010334537364542484, 'timestamp': '2025-09-10 02:30:20.187441', 'step': 4281, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:20.219319', 'step': 4281, 'epoch': 2} {'type': 'loss', 'content': 0.0005859578959643841, 'timestamp': '2025-09-10 02:30:20.221209', 'step': 4282, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:20.250078', 'step': 4282, 'epoch': 2} {'type': 'loss', 'content': 0.01634986512362957, 'timestamp': '2025-09-10 02:30:20.252211', 'step': 4283, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:20.284667', 'step': 4283, 'epoch': 2} {'type': 'loss', 'content': 0.002268632873892784, 'timestamp': '2025-09-10 02:30:20.308215', 'step': 4284, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:20.339507', 'step': 4284, 'epoch': 2} {'type': 'loss', 'content': 0.0009845413733273745, 'timestamp': '2025-09-10 02:30:20.341649', 'step': 4285, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:20.372886', 'step': 4285, 'epoch': 2} {'type': 'loss', 'content': 0.00032462459057569504, 'timestamp': '2025-09-10 02:30:20.374794', 'step': 4286, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:20.405344', 'step': 4286, 'epoch': 2} {'type': 'loss', 'content': 0.021166039630770683, 'timestamp': '2025-09-10 02:30:20.407618', 'step': 4287, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:20.440660', 'step': 4287, 'epoch': 2} {'type': 'loss', 'content': 0.0007713089580647647, 'timestamp': '2025-09-10 02:30:20.464792', 'step': 4288, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:20.493892', 'step': 4288, 'epoch': 2} {'type': 'loss', 'content': 0.00042154494440183043, 'timestamp': '2025-09-10 02:30:20.495786', 'step': 4289, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:20.525203', 'step': 4289, 'epoch': 2} {'type': 'loss', 'content': 0.004018072970211506, 'timestamp': '2025-09-10 02:30:20.527338', 'step': 4290, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:20.556973', 'step': 4290, 'epoch': 2} {'type': 'loss', 'content': 0.00023954005155246705, 'timestamp': '2025-09-10 02:30:20.559004', 'step': 4291, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:20.588276', 'step': 4291, 'epoch': 2} {'type': 'loss', 'content': 0.009805425070226192, 'timestamp': '2025-09-10 02:30:20.612048', 'step': 4292, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:20.641664', 'step': 4292, 'epoch': 2} {'type': 'loss', 'content': 0.004415757488459349, 'timestamp': '2025-09-10 02:30:20.645800', 'step': 4293, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:20.677481', 'step': 4293, 'epoch': 2} {'type': 'loss', 'content': 0.0014099564868956804, 'timestamp': '2025-09-10 02:30:20.680527', 'step': 4294, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:20.709383', 'step': 4294, 'epoch': 2} {'type': 'loss', 'content': 0.009648052044212818, 'timestamp': '2025-09-10 02:30:20.712168', 'step': 4295, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:20.741594', 'step': 4295, 'epoch': 2} {'type': 'loss', 'content': 0.0007489831768907607, 'timestamp': '2025-09-10 02:30:20.768093', 'step': 4296, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:20.796886', 'step': 4296, 'epoch': 2} {'type': 'loss', 'content': 0.016056550666689873, 'timestamp': '2025-09-10 02:30:20.799109', 'step': 4297, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:20.828407', 'step': 4297, 'epoch': 2} {'type': 'loss', 'content': 0.0002837378706317395, 'timestamp': '2025-09-10 02:30:20.830522', 'step': 4298, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:20.862806', 'step': 4298, 'epoch': 2} {'type': 'loss', 'content': 0.017477823421359062, 'timestamp': '2025-09-10 02:30:20.865409', 'step': 4299, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:20.894602', 'step': 4299, 'epoch': 2} {'type': 'loss', 'content': 0.0015934448456391692, 'timestamp': '2025-09-10 02:30:20.919782', 'step': 4300, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:20.951413', 'step': 4300, 'epoch': 2} {'type': 'loss', 'content': 0.02498321607708931, 'timestamp': '2025-09-10 02:30:20.954282', 'step': 4301, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:20.986608', 'step': 4301, 'epoch': 2} {'type': 'loss', 'content': 0.0045504337176680565, 'timestamp': '2025-09-10 02:30:20.988576', 'step': 4302, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:21.017450', 'step': 4302, 'epoch': 2} {'type': 'loss', 'content': 0.0029134657233953476, 'timestamp': '2025-09-10 02:30:21.019562', 'step': 4303, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:21.054943', 'step': 4303, 'epoch': 2} {'type': 'loss', 'content': 0.006049790419638157, 'timestamp': '2025-09-10 02:30:21.079094', 'step': 4304, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:21.109704', 'step': 4304, 'epoch': 2} {'type': 'loss', 'content': 0.005422768648713827, 'timestamp': '2025-09-10 02:30:21.111587', 'step': 4305, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:21.140923', 'step': 4305, 'epoch': 2} {'type': 'loss', 'content': 0.019810587167739868, 'timestamp': '2025-09-10 02:30:21.143115', 'step': 4306, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:21.172567', 'step': 4306, 'epoch': 2} {'type': 'loss', 'content': 0.00017135367670562118, 'timestamp': '2025-09-10 02:30:21.178457', 'step': 4307, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:21.207927', 'step': 4307, 'epoch': 2} {'type': 'loss', 'content': 0.01794002763926983, 'timestamp': '2025-09-10 02:30:21.231693', 'step': 4308, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:21.261949', 'step': 4308, 'epoch': 2} {'type': 'loss', 'content': 0.0010666154557839036, 'timestamp': '2025-09-10 02:30:21.264591', 'step': 4309, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:21.295011', 'step': 4309, 'epoch': 2} {'type': 'loss', 'content': 0.009481683373451233, 'timestamp': '2025-09-10 02:30:21.297113', 'step': 4310, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:21.326376', 'step': 4310, 'epoch': 2} {'type': 'loss', 'content': 0.004097465891391039, 'timestamp': '2025-09-10 02:30:21.329351', 'step': 4311, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:21.358503', 'step': 4311, 'epoch': 2} {'type': 'loss', 'content': 0.004019964952021837, 'timestamp': '2025-09-10 02:30:21.381963', 'step': 4312, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:21.411944', 'step': 4312, 'epoch': 2} {'type': 'loss', 'content': 0.000150880150613375, 'timestamp': '2025-09-10 02:30:21.414087', 'step': 4313, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:21.443170', 'step': 4313, 'epoch': 2} {'type': 'loss', 'content': 0.00041646347381174564, 'timestamp': '2025-09-10 02:30:21.445282', 'step': 4314, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:21.473991', 'step': 4314, 'epoch': 2} {'type': 'loss', 'content': 0.0008233272237703204, 'timestamp': '2025-09-10 02:30:21.476199', 'step': 4315, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:21.505209', 'step': 4315, 'epoch': 2} {'type': 'loss', 'content': 0.004062996245920658, 'timestamp': '2025-09-10 02:30:21.528556', 'step': 4316, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:21.560246', 'step': 4316, 'epoch': 2} {'type': 'loss', 'content': 0.030979419127106667, 'timestamp': '2025-09-10 02:30:21.564195', 'step': 4317, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:21.593114', 'step': 4317, 'epoch': 2} {'type': 'loss', 'content': 0.00021808150631841272, 'timestamp': '2025-09-10 02:30:21.595284', 'step': 4318, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:21.624563', 'step': 4318, 'epoch': 2} {'type': 'loss', 'content': 0.0003848371852654964, 'timestamp': '2025-09-10 02:30:21.626975', 'step': 4319, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:21.662543', 'step': 4319, 'epoch': 2} {'type': 'loss', 'content': 0.005721138324588537, 'timestamp': '2025-09-10 02:30:21.685839', 'step': 4320, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:21.717253', 'step': 4320, 'epoch': 2} {'type': 'loss', 'content': 0.018453901633620262, 'timestamp': '2025-09-10 02:30:21.719565', 'step': 4321, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:21.749004', 'step': 4321, 'epoch': 2} {'type': 'loss', 'content': 0.0017978992545977235, 'timestamp': '2025-09-10 02:30:21.753362', 'step': 4322, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:21.783429', 'step': 4322, 'epoch': 2} {'type': 'loss', 'content': 0.038310449570417404, 'timestamp': '2025-09-10 02:30:21.791944', 'step': 4323, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:21.823634', 'step': 4323, 'epoch': 2} {'type': 'loss', 'content': 0.009124137461185455, 'timestamp': '2025-09-10 02:30:21.847332', 'step': 4324, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:21.877476', 'step': 4324, 'epoch': 2} {'type': 'loss', 'content': 0.016482409089803696, 'timestamp': '2025-09-10 02:30:21.879859', 'step': 4325, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:21.909288', 'step': 4325, 'epoch': 2} {'type': 'loss', 'content': 0.0018404892180114985, 'timestamp': '2025-09-10 02:30:21.912990', 'step': 4326, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:21.943616', 'step': 4326, 'epoch': 2} {'type': 'loss', 'content': 0.01833636499941349, 'timestamp': '2025-09-10 02:30:21.945728', 'step': 4327, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:21.975186', 'step': 4327, 'epoch': 2} {'type': 'loss', 'content': 0.008851002901792526, 'timestamp': '2025-09-10 02:30:21.998697', 'step': 4328, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:22.027828', 'step': 4328, 'epoch': 2} {'type': 'loss', 'content': 0.003933702129870653, 'timestamp': '2025-09-10 02:30:22.032762', 'step': 4329, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:22.065524', 'step': 4329, 'epoch': 2} {'type': 'loss', 'content': 0.04179712384939194, 'timestamp': '2025-09-10 02:30:22.068944', 'step': 4330, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:22.097699', 'step': 4330, 'epoch': 2} {'type': 'loss', 'content': 0.00019713399524334818, 'timestamp': '2025-09-10 02:30:22.099843', 'step': 4331, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:22.128888', 'step': 4331, 'epoch': 2} {'type': 'loss', 'content': 0.002089699497446418, 'timestamp': '2025-09-10 02:30:22.154230', 'step': 4332, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:22.183176', 'step': 4332, 'epoch': 2} {'type': 'loss', 'content': 0.005560423247516155, 'timestamp': '2025-09-10 02:30:22.185334', 'step': 4333, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:22.215316', 'step': 4333, 'epoch': 2} {'type': 'loss', 'content': 0.0005679992027580738, 'timestamp': '2025-09-10 02:30:22.217848', 'step': 4334, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:22.249359', 'step': 4334, 'epoch': 2} {'type': 'loss', 'content': 0.004473458975553513, 'timestamp': '2025-09-10 02:30:22.251589', 'step': 4335, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:22.281518', 'step': 4335, 'epoch': 2} {'type': 'loss', 'content': 0.0069739497266709805, 'timestamp': '2025-09-10 02:30:22.305350', 'step': 4336, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:22.335020', 'step': 4336, 'epoch': 2} {'type': 'loss', 'content': 0.0070192874409258366, 'timestamp': '2025-09-10 02:30:22.337826', 'step': 4337, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:22.366271', 'step': 4337, 'epoch': 2} {'type': 'loss', 'content': 0.0038986883591860533, 'timestamp': '2025-09-10 02:30:22.384092', 'step': 4338, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:22.415490', 'step': 4338, 'epoch': 2} {'type': 'loss', 'content': 0.012104692868888378, 'timestamp': '2025-09-10 02:30:22.419742', 'step': 4339, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:22.453813', 'step': 4339, 'epoch': 2} {'type': 'loss', 'content': 0.030272994190454483, 'timestamp': '2025-09-10 02:30:22.477229', 'step': 4340, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:22.509914', 'step': 4340, 'epoch': 2} {'type': 'loss', 'content': 0.002849965589120984, 'timestamp': '2025-09-10 02:30:22.511765', 'step': 4341, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:22.540016', 'step': 4341, 'epoch': 2} {'type': 'loss', 'content': 0.009622319601476192, 'timestamp': '2025-09-10 02:30:22.542150', 'step': 4342, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:22.572690', 'step': 4342, 'epoch': 2} {'type': 'loss', 'content': 0.0004333446267992258, 'timestamp': '2025-09-10 02:30:22.574843', 'step': 4343, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:22.630401', 'step': 4343, 'epoch': 2} {'type': 'loss', 'content': 0.0009035724797286093, 'timestamp': '2025-09-10 02:30:22.653933', 'step': 4344, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:22.684107', 'step': 4344, 'epoch': 2} {'type': 'loss', 'content': 0.003374104155227542, 'timestamp': '2025-09-10 02:30:22.686961', 'step': 4345, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:22.716053', 'step': 4345, 'epoch': 2} {'type': 'loss', 'content': 0.04673586040735245, 'timestamp': '2025-09-10 02:30:22.719010', 'step': 4346, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:22.749352', 'step': 4346, 'epoch': 2} {'type': 'loss', 'content': 0.05751964822411537, 'timestamp': '2025-09-10 02:30:22.752578', 'step': 4347, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:22.783129', 'step': 4347, 'epoch': 2} {'type': 'loss', 'content': 0.0013447256060317159, 'timestamp': '2025-09-10 02:30:22.806782', 'step': 4348, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:22.837528', 'step': 4348, 'epoch': 2} {'type': 'loss', 'content': 0.016711339354515076, 'timestamp': '2025-09-10 02:30:22.839666', 'step': 4349, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:22.871115', 'step': 4349, 'epoch': 2} {'type': 'loss', 'content': 0.0014064701972529292, 'timestamp': '2025-09-10 02:30:22.873325', 'step': 4350, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:22.906807', 'step': 4350, 'epoch': 2} {'type': 'loss', 'content': 0.0013334174873307347, 'timestamp': '2025-09-10 02:30:22.908840', 'step': 4351, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:22.938415', 'step': 4351, 'epoch': 2} {'type': 'loss', 'content': 0.0467769019305706, 'timestamp': '2025-09-10 02:30:22.965037', 'step': 4352, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:23.005987', 'step': 4352, 'epoch': 2} {'type': 'loss', 'content': 0.0005183253088034689, 'timestamp': '2025-09-10 02:30:23.008119', 'step': 4353, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:23.037685', 'step': 4353, 'epoch': 2} {'type': 'loss', 'content': 0.0013273023068904877, 'timestamp': '2025-09-10 02:30:23.039665', 'step': 4354, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:23.070285', 'step': 4354, 'epoch': 2} {'type': 'loss', 'content': 0.00047609535977244377, 'timestamp': '2025-09-10 02:30:23.073785', 'step': 4355, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:23.105178', 'step': 4355, 'epoch': 2} {'type': 'loss', 'content': 0.011602640151977539, 'timestamp': '2025-09-10 02:30:23.130111', 'step': 4356, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:23.160621', 'step': 4356, 'epoch': 2} {'type': 'loss', 'content': 0.01510648149996996, 'timestamp': '2025-09-10 02:30:23.162606', 'step': 4357, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:23.190891', 'step': 4357, 'epoch': 2} {'type': 'loss', 'content': 0.0038530982565134764, 'timestamp': '2025-09-10 02:30:23.192880', 'step': 4358, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:23.221886', 'step': 4358, 'epoch': 2} {'type': 'loss', 'content': 0.003956594504415989, 'timestamp': '2025-09-10 02:30:23.224020', 'step': 4359, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:23.252428', 'step': 4359, 'epoch': 2} {'type': 'loss', 'content': 0.006590021308511496, 'timestamp': '2025-09-10 02:30:23.276409', 'step': 4360, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:23.305598', 'step': 4360, 'epoch': 2} {'type': 'loss', 'content': 0.015391605906188488, 'timestamp': '2025-09-10 02:30:23.308032', 'step': 4361, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:23.337262', 'step': 4361, 'epoch': 2} {'type': 'loss', 'content': 0.0010404250351712108, 'timestamp': '2025-09-10 02:30:23.339418', 'step': 4362, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:23.370395', 'step': 4362, 'epoch': 2} {'type': 'loss', 'content': 0.0009703827672637999, 'timestamp': '2025-09-10 02:30:23.372249', 'step': 4363, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:23.402168', 'step': 4363, 'epoch': 2} {'type': 'loss', 'content': 0.030420567840337753, 'timestamp': '2025-09-10 02:30:23.426540', 'step': 4364, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:23.456881', 'step': 4364, 'epoch': 2} {'type': 'loss', 'content': 0.027547702193260193, 'timestamp': '2025-09-10 02:30:23.459276', 'step': 4365, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:23.488358', 'step': 4365, 'epoch': 2} {'type': 'loss', 'content': 0.04904058575630188, 'timestamp': '2025-09-10 02:30:23.490640', 'step': 4366, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:23.520113', 'step': 4366, 'epoch': 2} {'type': 'loss', 'content': 0.0006052871467545629, 'timestamp': '2025-09-10 02:30:23.522319', 'step': 4367, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:23.552152', 'step': 4367, 'epoch': 2} {'type': 'loss', 'content': 0.002918403595685959, 'timestamp': '2025-09-10 02:30:23.576007', 'step': 4368, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:23.607395', 'step': 4368, 'epoch': 2} {'type': 'loss', 'content': 0.0007114603067748249, 'timestamp': '2025-09-10 02:30:23.609435', 'step': 4369, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:23.639204', 'step': 4369, 'epoch': 2} {'type': 'loss', 'content': 0.0027193163987249136, 'timestamp': '2025-09-10 02:30:23.641485', 'step': 4370, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:23.672831', 'step': 4370, 'epoch': 2} {'type': 'loss', 'content': 0.00400652876123786, 'timestamp': '2025-09-10 02:30:23.674952', 'step': 4371, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:23.706334', 'step': 4371, 'epoch': 2} {'type': 'loss', 'content': 0.03902292251586914, 'timestamp': '2025-09-10 02:30:23.730285', 'step': 4372, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:23.771811', 'step': 4372, 'epoch': 2} {'type': 'loss', 'content': 0.002066724468022585, 'timestamp': '2025-09-10 02:30:23.773573', 'step': 4373, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:23.803950', 'step': 4373, 'epoch': 2} {'type': 'loss', 'content': 0.011587983928620815, 'timestamp': '2025-09-10 02:30:23.806137', 'step': 4374, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:23.835604', 'step': 4374, 'epoch': 2} {'type': 'loss', 'content': 0.033676352351903915, 'timestamp': '2025-09-10 02:30:23.840345', 'step': 4375, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:23.870719', 'step': 4375, 'epoch': 2} {'type': 'loss', 'content': 0.01650477945804596, 'timestamp': '2025-09-10 02:30:23.894287', 'step': 4376, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:23.923570', 'step': 4376, 'epoch': 2} {'type': 'loss', 'content': 0.0012162269558757544, 'timestamp': '2025-09-10 02:30:23.925507', 'step': 4377, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:23.965931', 'step': 4377, 'epoch': 2} {'type': 'loss', 'content': 0.04293219372630119, 'timestamp': '2025-09-10 02:30:23.967876', 'step': 4378, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:23.996689', 'step': 4378, 'epoch': 2} {'type': 'loss', 'content': 0.01945372484624386, 'timestamp': '2025-09-10 02:30:23.998955', 'step': 4379, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:24.028331', 'step': 4379, 'epoch': 2} {'type': 'loss', 'content': 0.0012687245616689324, 'timestamp': '2025-09-10 02:30:24.054584', 'step': 4380, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:24.091721', 'step': 4380, 'epoch': 2} {'type': 'loss', 'content': 0.03918248787522316, 'timestamp': '2025-09-10 02:30:24.095187', 'step': 4381, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:24.125416', 'step': 4381, 'epoch': 2} {'type': 'loss', 'content': 0.007243589963763952, 'timestamp': '2025-09-10 02:30:24.127564', 'step': 4382, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:24.156871', 'step': 4382, 'epoch': 2} {'type': 'loss', 'content': 0.00038471867446787655, 'timestamp': '2025-09-10 02:30:24.159070', 'step': 4383, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:24.188233', 'step': 4383, 'epoch': 2} {'type': 'loss', 'content': 0.006663180887699127, 'timestamp': '2025-09-10 02:30:24.212548', 'step': 4384, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:24.242870', 'step': 4384, 'epoch': 2} {'type': 'loss', 'content': 0.005792879965156317, 'timestamp': '2025-09-10 02:30:24.245016', 'step': 4385, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:24.274190', 'step': 4385, 'epoch': 2} {'type': 'loss', 'content': 0.016859596595168114, 'timestamp': '2025-09-10 02:30:24.276207', 'step': 4386, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:24.305400', 'step': 4386, 'epoch': 2} {'type': 'loss', 'content': 0.020246658474206924, 'timestamp': '2025-09-10 02:30:24.307193', 'step': 4387, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:24.336345', 'step': 4387, 'epoch': 2} {'type': 'loss', 'content': 0.01099295262247324, 'timestamp': '2025-09-10 02:30:24.360108', 'step': 4388, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:24.389583', 'step': 4388, 'epoch': 2} {'type': 'loss', 'content': 0.0008660271996632218, 'timestamp': '2025-09-10 02:30:24.391765', 'step': 4389, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:24.421318', 'step': 4389, 'epoch': 2} {'type': 'loss', 'content': 0.026865771040320396, 'timestamp': '2025-09-10 02:30:24.423407', 'step': 4390, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:24.452669', 'step': 4390, 'epoch': 2} {'type': 'loss', 'content': 0.05440434068441391, 'timestamp': '2025-09-10 02:30:24.456462', 'step': 4391, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:24.487706', 'step': 4391, 'epoch': 2} {'type': 'loss', 'content': 0.04076675325632095, 'timestamp': '2025-09-10 02:30:24.511080', 'step': 4392, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:24.540355', 'step': 4392, 'epoch': 2} {'type': 'loss', 'content': 0.012472431175410748, 'timestamp': '2025-09-10 02:30:24.542405', 'step': 4393, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:24.571483', 'step': 4393, 'epoch': 2} {'type': 'loss', 'content': 0.015914931893348694, 'timestamp': '2025-09-10 02:30:24.575409', 'step': 4394, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:24.608575', 'step': 4394, 'epoch': 2} {'type': 'loss', 'content': 0.0016718072583898902, 'timestamp': '2025-09-10 02:30:24.613324', 'step': 4395, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:24.642871', 'step': 4395, 'epoch': 2} {'type': 'loss', 'content': 0.0273450817912817, 'timestamp': '2025-09-10 02:30:24.670084', 'step': 4396, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:24.705022', 'step': 4396, 'epoch': 2} {'type': 'loss', 'content': 0.02937299944460392, 'timestamp': '2025-09-10 02:30:24.707243', 'step': 4397, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:24.736701', 'step': 4397, 'epoch': 2} {'type': 'loss', 'content': 0.018649373203516006, 'timestamp': '2025-09-10 02:30:24.739354', 'step': 4398, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:24.769007', 'step': 4398, 'epoch': 2} {'type': 'loss', 'content': 0.0013912491267547011, 'timestamp': '2025-09-10 02:30:24.770836', 'step': 4399, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:24.799806', 'step': 4399, 'epoch': 2} {'type': 'loss', 'content': 0.007849449291825294, 'timestamp': '2025-09-10 02:30:24.824993', 'step': 4400, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:24.854540', 'step': 4400, 'epoch': 2} {'type': 'loss', 'content': 0.00767852645367384, 'timestamp': '2025-09-10 02:30:24.856739', 'step': 4401, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:24.887985', 'step': 4401, 'epoch': 2} {'type': 'loss', 'content': 0.018767986446619034, 'timestamp': '2025-09-10 02:30:24.889897', 'step': 4402, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:24.918697', 'step': 4402, 'epoch': 2} {'type': 'loss', 'content': 0.02546873316168785, 'timestamp': '2025-09-10 02:30:24.920829', 'step': 4403, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:24.949982', 'step': 4403, 'epoch': 2} {'type': 'loss', 'content': 0.007949613966047764, 'timestamp': '2025-09-10 02:30:24.973518', 'step': 4404, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:25.002946', 'step': 4404, 'epoch': 2} {'type': 'loss', 'content': 0.010820974595844746, 'timestamp': '2025-09-10 02:30:25.006193', 'step': 4405, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:25.040733', 'step': 4405, 'epoch': 2} {'type': 'loss', 'content': 0.013274082913994789, 'timestamp': '2025-09-10 02:30:25.042839', 'step': 4406, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:25.074017', 'step': 4406, 'epoch': 2} {'type': 'loss', 'content': 0.029004592448472977, 'timestamp': '2025-09-10 02:30:25.075920', 'step': 4407, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:25.104443', 'step': 4407, 'epoch': 2} {'type': 'loss', 'content': 0.04145583137869835, 'timestamp': '2025-09-10 02:30:25.128303', 'step': 4408, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1582003754624}], 'timestamp': '2025-09-10 02:30:27.121558', 'step': 4408, 'epoch': 2} {'type': 'pplx', 'content': 2366995.1777753704, 'timestamp': '2025-09-10 02:30:27.123556', 'step': 4408, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:27.151440', 'step': 4408, 'epoch': 2} {'type': 'loss', 'content': 0.007996621541678905, 'timestamp': '2025-09-10 02:30:27.153505', 'step': 4409, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:27.183492', 'step': 4409, 'epoch': 2} {'type': 'loss', 'content': 0.01478519756346941, 'timestamp': '2025-09-10 02:30:27.185595', 'step': 4410, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:27.215118', 'step': 4410, 'epoch': 2} {'type': 'loss', 'content': 0.016848096624016762, 'timestamp': '2025-09-10 02:30:27.217141', 'step': 4411, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:27.246691', 'step': 4411, 'epoch': 2} {'type': 'loss', 'content': 0.08180829137563705, 'timestamp': '2025-09-10 02:30:27.270446', 'step': 4412, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:27.299913', 'step': 4412, 'epoch': 2} {'type': 'loss', 'content': 0.014533833600580692, 'timestamp': '2025-09-10 02:30:27.301939', 'step': 4413, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:27.330877', 'step': 4413, 'epoch': 2} {'type': 'loss', 'content': 0.012547485530376434, 'timestamp': '2025-09-10 02:30:27.333010', 'step': 4414, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:27.362564', 'step': 4414, 'epoch': 2} {'type': 'loss', 'content': 0.03431883081793785, 'timestamp': '2025-09-10 02:30:27.364594', 'step': 4415, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:27.393735', 'step': 4415, 'epoch': 2} {'type': 'loss', 'content': 0.001154187018983066, 'timestamp': '2025-09-10 02:30:27.417466', 'step': 4416, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:27.446568', 'step': 4416, 'epoch': 2} {'type': 'loss', 'content': 0.047597140073776245, 'timestamp': '2025-09-10 02:30:27.448783', 'step': 4417, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:27.477889', 'step': 4417, 'epoch': 2} {'type': 'loss', 'content': 0.024733979254961014, 'timestamp': '2025-09-10 02:30:27.479782', 'step': 4418, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:27.508554', 'step': 4418, 'epoch': 2} {'type': 'loss', 'content': 0.013838161714375019, 'timestamp': '2025-09-10 02:30:27.510667', 'step': 4419, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:27.539714', 'step': 4419, 'epoch': 2} {'type': 'loss', 'content': 0.005546521861106157, 'timestamp': '2025-09-10 02:30:27.563379', 'step': 4420, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:27.592702', 'step': 4420, 'epoch': 2} {'type': 'loss', 'content': 0.008822837844491005, 'timestamp': '2025-09-10 02:30:27.594546', 'step': 4421, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:27.623827', 'step': 4421, 'epoch': 2} {'type': 'loss', 'content': 0.04214005544781685, 'timestamp': '2025-09-10 02:30:27.626120', 'step': 4422, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:27.655457', 'step': 4422, 'epoch': 2} {'type': 'loss', 'content': 0.02608082816004753, 'timestamp': '2025-09-10 02:30:27.657492', 'step': 4423, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:27.687543', 'step': 4423, 'epoch': 2} {'type': 'loss', 'content': 0.03527413681149483, 'timestamp': '2025-09-10 02:30:27.710937', 'step': 4424, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:27.740125', 'step': 4424, 'epoch': 2} {'type': 'loss', 'content': 0.009323501959443092, 'timestamp': '2025-09-10 02:30:27.742325', 'step': 4425, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:27.771628', 'step': 4425, 'epoch': 2} {'type': 'loss', 'content': 0.007076173089444637, 'timestamp': '2025-09-10 02:30:27.775053', 'step': 4426, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:27.804117', 'step': 4426, 'epoch': 2} {'type': 'loss', 'content': 0.014898846857249737, 'timestamp': '2025-09-10 02:30:27.805960', 'step': 4427, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:27.835122', 'step': 4427, 'epoch': 2} {'type': 'loss', 'content': 0.0012923700269311666, 'timestamp': '2025-09-10 02:30:27.858521', 'step': 4428, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:27.888869', 'step': 4428, 'epoch': 2} {'type': 'loss', 'content': 0.0009464005706831813, 'timestamp': '2025-09-10 02:30:27.890954', 'step': 4429, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:27.920388', 'step': 4429, 'epoch': 2} {'type': 'loss', 'content': 0.03467162325978279, 'timestamp': '2025-09-10 02:30:27.922292', 'step': 4430, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:27.952217', 'step': 4430, 'epoch': 2} {'type': 'loss', 'content': 0.0009034690447151661, 'timestamp': '2025-09-10 02:30:27.954488', 'step': 4431, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:27.984392', 'step': 4431, 'epoch': 2} {'type': 'loss', 'content': 0.004393530543893576, 'timestamp': '2025-09-10 02:30:28.007999', 'step': 4432, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:28.038094', 'step': 4432, 'epoch': 2} {'type': 'loss', 'content': 0.020813841372728348, 'timestamp': '2025-09-10 02:30:28.039780', 'step': 4433, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:28.068526', 'step': 4433, 'epoch': 2} {'type': 'loss', 'content': 0.0029184853192418814, 'timestamp': '2025-09-10 02:30:28.070686', 'step': 4434, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:28.099692', 'step': 4434, 'epoch': 2} {'type': 'loss', 'content': 0.018276767805218697, 'timestamp': '2025-09-10 02:30:28.101828', 'step': 4435, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:28.130932', 'step': 4435, 'epoch': 2} {'type': 'loss', 'content': 0.013618439435958862, 'timestamp': '2025-09-10 02:30:28.154633', 'step': 4436, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:28.184012', 'step': 4436, 'epoch': 2} {'type': 'loss', 'content': 0.01371348462998867, 'timestamp': '2025-09-10 02:30:28.185963', 'step': 4437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:28.215496', 'step': 4437, 'epoch': 2} {'type': 'loss', 'content': 0.010511423461139202, 'timestamp': '2025-09-10 02:30:28.217561', 'step': 4438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:28.247095', 'step': 4438, 'epoch': 2} {'type': 'loss', 'content': 0.016875606030225754, 'timestamp': '2025-09-10 02:30:28.249478', 'step': 4439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:28.278798', 'step': 4439, 'epoch': 2} {'type': 'loss', 'content': 0.037644706666469574, 'timestamp': '2025-09-10 02:30:28.302399', 'step': 4440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:28.332021', 'step': 4440, 'epoch': 2} {'type': 'loss', 'content': 0.014547375962138176, 'timestamp': '2025-09-10 02:30:28.334181', 'step': 4441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:28.363933', 'step': 4441, 'epoch': 2} {'type': 'loss', 'content': 0.02772408537566662, 'timestamp': '2025-09-10 02:30:28.365908', 'step': 4442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:28.395389', 'step': 4442, 'epoch': 2} {'type': 'loss', 'content': 0.00911302026361227, 'timestamp': '2025-09-10 02:30:28.397521', 'step': 4443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:28.426455', 'step': 4443, 'epoch': 2} {'type': 'loss', 'content': 0.01764538884162903, 'timestamp': '2025-09-10 02:30:28.450155', 'step': 4444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:28.479872', 'step': 4444, 'epoch': 2} {'type': 'loss', 'content': 0.013143041171133518, 'timestamp': '2025-09-10 02:30:28.481596', 'step': 4445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:28.510649', 'step': 4445, 'epoch': 2} {'type': 'loss', 'content': 0.025145303457975388, 'timestamp': '2025-09-10 02:30:28.512901', 'step': 4446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:28.542010', 'step': 4446, 'epoch': 2} {'type': 'loss', 'content': 0.0038895097095519304, 'timestamp': '2025-09-10 02:30:28.544178', 'step': 4447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:28.574420', 'step': 4447, 'epoch': 2} {'type': 'loss', 'content': 0.005414228420704603, 'timestamp': '2025-09-10 02:30:28.598255', 'step': 4448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:28.627847', 'step': 4448, 'epoch': 2} {'type': 'loss', 'content': 0.008694284595549107, 'timestamp': '2025-09-10 02:30:28.629922', 'step': 4449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:28.658963', 'step': 4449, 'epoch': 2} {'type': 'loss', 'content': 0.015848910436034203, 'timestamp': '2025-09-10 02:30:28.661076', 'step': 4450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:28.690141', 'step': 4450, 'epoch': 2} {'type': 'loss', 'content': 0.0003486702044028789, 'timestamp': '2025-09-10 02:30:28.692275', 'step': 4451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:28.721573', 'step': 4451, 'epoch': 2} {'type': 'loss', 'content': 0.006905894260853529, 'timestamp': '2025-09-10 02:30:28.745163', 'step': 4452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:28.774920', 'step': 4452, 'epoch': 2} {'type': 'loss', 'content': 0.0024056038819253445, 'timestamp': '2025-09-10 02:30:28.777049', 'step': 4453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:28.806242', 'step': 4453, 'epoch': 2} {'type': 'loss', 'content': 0.0023369495756924152, 'timestamp': '2025-09-10 02:30:28.808317', 'step': 4454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:28.837443', 'step': 4454, 'epoch': 2} {'type': 'loss', 'content': 0.006998989265412092, 'timestamp': '2025-09-10 02:30:28.839539', 'step': 4455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:28.868436', 'step': 4455, 'epoch': 2} {'type': 'loss', 'content': 0.010632582008838654, 'timestamp': '2025-09-10 02:30:28.891870', 'step': 4456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:28.920993', 'step': 4456, 'epoch': 2} {'type': 'loss', 'content': 0.004489853512495756, 'timestamp': '2025-09-10 02:30:28.922815', 'step': 4457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:28.951863', 'step': 4457, 'epoch': 2} {'type': 'loss', 'content': 0.010355109348893166, 'timestamp': '2025-09-10 02:30:28.953999', 'step': 4458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:28.983310', 'step': 4458, 'epoch': 2} {'type': 'loss', 'content': 0.010508539155125618, 'timestamp': '2025-09-10 02:30:28.985297', 'step': 4459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:29.015560', 'step': 4459, 'epoch': 2} {'type': 'loss', 'content': 0.010677381418645382, 'timestamp': '2025-09-10 02:30:29.039389', 'step': 4460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:29.068827', 'step': 4460, 'epoch': 2} {'type': 'loss', 'content': 0.0026452091988176107, 'timestamp': '2025-09-10 02:30:29.071106', 'step': 4461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:29.101273', 'step': 4461, 'epoch': 2} {'type': 'loss', 'content': 0.0003370894701220095, 'timestamp': '2025-09-10 02:30:29.103329', 'step': 4462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:29.132808', 'step': 4462, 'epoch': 2} {'type': 'loss', 'content': 0.005529838614165783, 'timestamp': '2025-09-10 02:30:29.134900', 'step': 4463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:29.164009', 'step': 4463, 'epoch': 2} {'type': 'loss', 'content': 0.009987021796405315, 'timestamp': '2025-09-10 02:30:29.187720', 'step': 4464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:29.216988', 'step': 4464, 'epoch': 2} {'type': 'loss', 'content': 0.029712708666920662, 'timestamp': '2025-09-10 02:30:29.219119', 'step': 4465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:29.248201', 'step': 4465, 'epoch': 2} {'type': 'loss', 'content': 0.0027898624539375305, 'timestamp': '2025-09-10 02:30:29.250252', 'step': 4466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:29.279434', 'step': 4466, 'epoch': 2} {'type': 'loss', 'content': 0.03034038282930851, 'timestamp': '2025-09-10 02:30:29.281386', 'step': 4467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:29.310856', 'step': 4467, 'epoch': 2} {'type': 'loss', 'content': 0.019655631855130196, 'timestamp': '2025-09-10 02:30:29.334421', 'step': 4468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:29.364090', 'step': 4468, 'epoch': 2} {'type': 'loss', 'content': 0.007132671773433685, 'timestamp': '2025-09-10 02:30:29.366049', 'step': 4469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:29.395033', 'step': 4469, 'epoch': 2} {'type': 'loss', 'content': 0.008172462694346905, 'timestamp': '2025-09-10 02:30:29.397137', 'step': 4470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:29.426163', 'step': 4470, 'epoch': 2} {'type': 'loss', 'content': 0.010540196672081947, 'timestamp': '2025-09-10 02:30:29.428029', 'step': 4471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:29.458043', 'step': 4471, 'epoch': 2} {'type': 'loss', 'content': 0.019368048757314682, 'timestamp': '2025-09-10 02:30:29.481897', 'step': 4472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:29.511612', 'step': 4472, 'epoch': 2} {'type': 'loss', 'content': 0.0010160019155591726, 'timestamp': '2025-09-10 02:30:29.513703', 'step': 4473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:29.543244', 'step': 4473, 'epoch': 2} {'type': 'loss', 'content': 0.0021432091016322374, 'timestamp': '2025-09-10 02:30:29.545374', 'step': 4474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:29.575057', 'step': 4474, 'epoch': 2} {'type': 'loss', 'content': 0.0005147705087438226, 'timestamp': '2025-09-10 02:30:29.577302', 'step': 4475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:29.606609', 'step': 4475, 'epoch': 2} {'type': 'loss', 'content': 0.0011651624226942658, 'timestamp': '2025-09-10 02:30:29.630465', 'step': 4476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:29.661370', 'step': 4476, 'epoch': 2} {'type': 'loss', 'content': 0.038272712379693985, 'timestamp': '2025-09-10 02:30:29.663418', 'step': 4477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:29.693789', 'step': 4477, 'epoch': 2} {'type': 'loss', 'content': 0.01615484617650509, 'timestamp': '2025-09-10 02:30:29.695889', 'step': 4478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:29.725712', 'step': 4478, 'epoch': 2} {'type': 'loss', 'content': 0.07328778505325317, 'timestamp': '2025-09-10 02:30:29.727805', 'step': 4479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:29.757694', 'step': 4479, 'epoch': 2} {'type': 'loss', 'content': 0.00021645925880875438, 'timestamp': '2025-09-10 02:30:29.781335', 'step': 4480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:29.811439', 'step': 4480, 'epoch': 2} {'type': 'loss', 'content': 0.0031695568468421698, 'timestamp': '2025-09-10 02:30:29.813536', 'step': 4481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:29.842905', 'step': 4481, 'epoch': 2} {'type': 'loss', 'content': 0.005539781413972378, 'timestamp': '2025-09-10 02:30:29.845036', 'step': 4482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:29.873992', 'step': 4482, 'epoch': 2} {'type': 'loss', 'content': 0.00024167521041817963, 'timestamp': '2025-09-10 02:30:29.875968', 'step': 4483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:29.905330', 'step': 4483, 'epoch': 2} {'type': 'loss', 'content': 0.007639687974005938, 'timestamp': '2025-09-10 02:30:29.929051', 'step': 4484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:29.958031', 'step': 4484, 'epoch': 2} {'type': 'loss', 'content': 0.01753472536802292, 'timestamp': '2025-09-10 02:30:29.960094', 'step': 4485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:29.989210', 'step': 4485, 'epoch': 2} {'type': 'loss', 'content': 0.0038788598030805588, 'timestamp': '2025-09-10 02:30:29.991038', 'step': 4486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:30.020193', 'step': 4486, 'epoch': 2} {'type': 'loss', 'content': 0.012585505843162537, 'timestamp': '2025-09-10 02:30:30.022320', 'step': 4487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:30.051566', 'step': 4487, 'epoch': 2} {'type': 'loss', 'content': 0.0010783494217321277, 'timestamp': '2025-09-10 02:30:30.075099', 'step': 4488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:30.105465', 'step': 4488, 'epoch': 2} {'type': 'loss', 'content': 0.0025533498264849186, 'timestamp': '2025-09-10 02:30:30.107544', 'step': 4489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:30.137397', 'step': 4489, 'epoch': 2} {'type': 'loss', 'content': 0.01380517240613699, 'timestamp': '2025-09-10 02:30:30.139623', 'step': 4490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:30.170262', 'step': 4490, 'epoch': 2} {'type': 'loss', 'content': 0.0004282824811525643, 'timestamp': '2025-09-10 02:30:30.172093', 'step': 4491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:30.201297', 'step': 4491, 'epoch': 2} {'type': 'loss', 'content': 0.00017427995044272393, 'timestamp': '2025-09-10 02:30:30.225037', 'step': 4492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:30.254467', 'step': 4492, 'epoch': 2} {'type': 'loss', 'content': 0.011240904219448566, 'timestamp': '2025-09-10 02:30:30.256545', 'step': 4493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:30.285132', 'step': 4493, 'epoch': 2} {'type': 'loss', 'content': 0.002480054972693324, 'timestamp': '2025-09-10 02:30:30.286814', 'step': 4494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:30.315857', 'step': 4494, 'epoch': 2} {'type': 'loss', 'content': 0.000373142451280728, 'timestamp': '2025-09-10 02:30:30.317778', 'step': 4495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:30.347201', 'step': 4495, 'epoch': 2} {'type': 'loss', 'content': 0.00369776482693851, 'timestamp': '2025-09-10 02:30:30.370586', 'step': 4496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:30.402492', 'step': 4496, 'epoch': 2} {'type': 'loss', 'content': 0.0031046585645526648, 'timestamp': '2025-09-10 02:30:30.404344', 'step': 4497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:30.432921', 'step': 4497, 'epoch': 2} {'type': 'loss', 'content': 0.0007371109095402062, 'timestamp': '2025-09-10 02:30:30.434975', 'step': 4498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:30.463916', 'step': 4498, 'epoch': 2} {'type': 'loss', 'content': 0.0029690249357372522, 'timestamp': '2025-09-10 02:30:30.465804', 'step': 4499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:30.494440', 'step': 4499, 'epoch': 2} {'type': 'loss', 'content': 0.032537732273340225, 'timestamp': '2025-09-10 02:30:30.517796', 'step': 4500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 4500', 'timestamp': '2025-09-10 02:30:34.944486', 'step': 4500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:34.982290', 'step': 4500, 'epoch': 2} {'type': 'loss', 'content': 0.00028259860118851066, 'timestamp': '2025-09-10 02:30:34.984380', 'step': 4501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:35.014468', 'step': 4501, 'epoch': 2} {'type': 'loss', 'content': 0.006036944221705198, 'timestamp': '2025-09-10 02:30:35.016298', 'step': 4502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:35.045894', 'step': 4502, 'epoch': 2} {'type': 'loss', 'content': 0.00026221523876301944, 'timestamp': '2025-09-10 02:30:35.047980', 'step': 4503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:35.077275', 'step': 4503, 'epoch': 2} {'type': 'loss', 'content': 0.0023497173096984625, 'timestamp': '2025-09-10 02:30:35.100901', 'step': 4504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:35.130356', 'step': 4504, 'epoch': 2} {'type': 'loss', 'content': 0.04136881232261658, 'timestamp': '2025-09-10 02:30:35.132147', 'step': 4505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:35.161923', 'step': 4505, 'epoch': 2} {'type': 'loss', 'content': 0.008326425217092037, 'timestamp': '2025-09-10 02:30:35.163712', 'step': 4506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:35.192416', 'step': 4506, 'epoch': 2} {'type': 'loss', 'content': 0.0009407126344740391, 'timestamp': '2025-09-10 02:30:35.194137', 'step': 4507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:35.222615', 'step': 4507, 'epoch': 2} {'type': 'loss', 'content': 0.011470125056803226, 'timestamp': '2025-09-10 02:30:35.246364', 'step': 4508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:35.275353', 'step': 4508, 'epoch': 2} {'type': 'loss', 'content': 0.045114580541849136, 'timestamp': '2025-09-10 02:30:35.277229', 'step': 4509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:35.306439', 'step': 4509, 'epoch': 2} {'type': 'loss', 'content': 0.016461007297039032, 'timestamp': '2025-09-10 02:30:35.308324', 'step': 4510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:35.337288', 'step': 4510, 'epoch': 2} {'type': 'loss', 'content': 0.0015779578825458884, 'timestamp': '2025-09-10 02:30:35.339222', 'step': 4511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:35.368805', 'step': 4511, 'epoch': 2} {'type': 'loss', 'content': 0.0010573251638561487, 'timestamp': '2025-09-10 02:30:35.392149', 'step': 4512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:35.424045', 'step': 4512, 'epoch': 2} {'type': 'loss', 'content': 0.0010005880612879992, 'timestamp': '2025-09-10 02:30:35.425842', 'step': 4513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:35.454496', 'step': 4513, 'epoch': 2} {'type': 'loss', 'content': 0.009261549450457096, 'timestamp': '2025-09-10 02:30:35.456502', 'step': 4514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:35.485348', 'step': 4514, 'epoch': 2} {'type': 'loss', 'content': 0.0003881935845129192, 'timestamp': '2025-09-10 02:30:35.488300', 'step': 4515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:35.517534', 'step': 4515, 'epoch': 2} {'type': 'loss', 'content': 0.0477876141667366, 'timestamp': '2025-09-10 02:30:35.540842', 'step': 4516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:35.569696', 'step': 4516, 'epoch': 2} {'type': 'loss', 'content': 0.0003711617609951645, 'timestamp': '2025-09-10 02:30:35.571793', 'step': 4517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:35.600905', 'step': 4517, 'epoch': 2} {'type': 'loss', 'content': 0.0011772002326324582, 'timestamp': '2025-09-10 02:30:35.602901', 'step': 4518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:35.631702', 'step': 4518, 'epoch': 2} {'type': 'loss', 'content': 0.0020203415770083666, 'timestamp': '2025-09-10 02:30:35.633678', 'step': 4519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:35.662660', 'step': 4519, 'epoch': 2} {'type': 'loss', 'content': 0.0011317833559587598, 'timestamp': '2025-09-10 02:30:35.686088', 'step': 4520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:35.715203', 'step': 4520, 'epoch': 2} {'type': 'loss', 'content': 0.0004131353634875268, 'timestamp': '2025-09-10 02:30:35.717090', 'step': 4521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:35.746432', 'step': 4521, 'epoch': 2} {'type': 'loss', 'content': 0.005883991252630949, 'timestamp': '2025-09-10 02:30:35.748380', 'step': 4522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:35.777539', 'step': 4522, 'epoch': 2} {'type': 'loss', 'content': 0.005447355564683676, 'timestamp': '2025-09-10 02:30:35.779478', 'step': 4523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:35.808483', 'step': 4523, 'epoch': 2} {'type': 'loss', 'content': 0.0002769411075860262, 'timestamp': '2025-09-10 02:30:35.831945', 'step': 4524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:35.860608', 'step': 4524, 'epoch': 2} {'type': 'loss', 'content': 0.03765971213579178, 'timestamp': '2025-09-10 02:30:35.862600', 'step': 4525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:35.891611', 'step': 4525, 'epoch': 2} {'type': 'loss', 'content': 0.003222037572413683, 'timestamp': '2025-09-10 02:30:35.893552', 'step': 4526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:35.922583', 'step': 4526, 'epoch': 2} {'type': 'loss', 'content': 0.0010269946651533246, 'timestamp': '2025-09-10 02:30:35.924455', 'step': 4527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:35.953620', 'step': 4527, 'epoch': 2} {'type': 'loss', 'content': 0.0013073551235720515, 'timestamp': '2025-09-10 02:30:35.976876', 'step': 4528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:36.005621', 'step': 4528, 'epoch': 2} {'type': 'loss', 'content': 0.003010762622579932, 'timestamp': '2025-09-10 02:30:36.007492', 'step': 4529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:36.036212', 'step': 4529, 'epoch': 2} {'type': 'loss', 'content': 0.0005810009897686541, 'timestamp': '2025-09-10 02:30:36.038268', 'step': 4530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:36.067224', 'step': 4530, 'epoch': 2} {'type': 'loss', 'content': 0.023207888007164, 'timestamp': '2025-09-10 02:30:36.069042', 'step': 4531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:36.097839', 'step': 4531, 'epoch': 2} {'type': 'loss', 'content': 0.0025188697036355734, 'timestamp': '2025-09-10 02:30:36.121398', 'step': 4532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:36.150619', 'step': 4532, 'epoch': 2} {'type': 'loss', 'content': 0.00013451059930957854, 'timestamp': '2025-09-10 02:30:36.152661', 'step': 4533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:36.181801', 'step': 4533, 'epoch': 2} {'type': 'loss', 'content': 0.0369986928999424, 'timestamp': '2025-09-10 02:30:36.183835', 'step': 4534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:36.213081', 'step': 4534, 'epoch': 2} {'type': 'loss', 'content': 0.0017421423690393567, 'timestamp': '2025-09-10 02:30:36.215043', 'step': 4535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:36.244310', 'step': 4535, 'epoch': 2} {'type': 'loss', 'content': 0.001130439923144877, 'timestamp': '2025-09-10 02:30:36.267772', 'step': 4536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:36.297053', 'step': 4536, 'epoch': 2} {'type': 'loss', 'content': 0.005186374299228191, 'timestamp': '2025-09-10 02:30:36.298900', 'step': 4537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:36.327623', 'step': 4537, 'epoch': 2} {'type': 'loss', 'content': 0.0008350748685188591, 'timestamp': '2025-09-10 02:30:36.329455', 'step': 4538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:36.358526', 'step': 4538, 'epoch': 2} {'type': 'loss', 'content': 0.0476171113550663, 'timestamp': '2025-09-10 02:30:36.360330', 'step': 4539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:36.389620', 'step': 4539, 'epoch': 2} {'type': 'loss', 'content': 0.00242114020511508, 'timestamp': '2025-09-10 02:30:36.412863', 'step': 4540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:36.441691', 'step': 4540, 'epoch': 2} {'type': 'loss', 'content': 0.0016092261066660285, 'timestamp': '2025-09-10 02:30:36.443888', 'step': 4541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:36.475386', 'step': 4541, 'epoch': 2} {'type': 'loss', 'content': 0.0004083625681232661, 'timestamp': '2025-09-10 02:30:36.477282', 'step': 4542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:36.506802', 'step': 4542, 'epoch': 2} {'type': 'loss', 'content': 0.00012146379594923928, 'timestamp': '2025-09-10 02:30:36.508782', 'step': 4543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:36.537619', 'step': 4543, 'epoch': 2} {'type': 'loss', 'content': 0.0010169557062909007, 'timestamp': '2025-09-10 02:30:36.561091', 'step': 4544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:36.590239', 'step': 4544, 'epoch': 2} {'type': 'loss', 'content': 0.004768866579979658, 'timestamp': '2025-09-10 02:30:36.592197', 'step': 4545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:36.621579', 'step': 4545, 'epoch': 2} {'type': 'loss', 'content': 0.04669610410928726, 'timestamp': '2025-09-10 02:30:36.623469', 'step': 4546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:36.652562', 'step': 4546, 'epoch': 2} {'type': 'loss', 'content': 0.00036743137752637267, 'timestamp': '2025-09-10 02:30:36.654545', 'step': 4547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:36.683313', 'step': 4547, 'epoch': 2} {'type': 'loss', 'content': 0.014560094103217125, 'timestamp': '2025-09-10 02:30:36.706857', 'step': 4548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:36.735591', 'step': 4548, 'epoch': 2} {'type': 'loss', 'content': 0.0003129991819150746, 'timestamp': '2025-09-10 02:30:36.737521', 'step': 4549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:36.766446', 'step': 4549, 'epoch': 2} {'type': 'loss', 'content': 0.0013748781057074666, 'timestamp': '2025-09-10 02:30:36.768240', 'step': 4550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:36.797325', 'step': 4550, 'epoch': 2} {'type': 'loss', 'content': 0.004956469871103764, 'timestamp': '2025-09-10 02:30:36.799382', 'step': 4551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:36.828415', 'step': 4551, 'epoch': 2} {'type': 'loss', 'content': 0.036850493401288986, 'timestamp': '2025-09-10 02:30:36.851979', 'step': 4552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:36.881034', 'step': 4552, 'epoch': 2} {'type': 'loss', 'content': 0.00027258231421001256, 'timestamp': '2025-09-10 02:30:36.882853', 'step': 4553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:36.911579', 'step': 4553, 'epoch': 2} {'type': 'loss', 'content': 0.00446205073967576, 'timestamp': '2025-09-10 02:30:36.913659', 'step': 4554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:36.942456', 'step': 4554, 'epoch': 2} {'type': 'loss', 'content': 0.004611088894307613, 'timestamp': '2025-09-10 02:30:36.944398', 'step': 4555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:36.973478', 'step': 4555, 'epoch': 2} {'type': 'loss', 'content': 0.029512211680412292, 'timestamp': '2025-09-10 02:30:36.996744', 'step': 4556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:37.025574', 'step': 4556, 'epoch': 2} {'type': 'loss', 'content': 0.007085477467626333, 'timestamp': '2025-09-10 02:30:37.027557', 'step': 4557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:37.056316', 'step': 4557, 'epoch': 2} {'type': 'loss', 'content': 0.00022227386943995953, 'timestamp': '2025-09-10 02:30:37.058258', 'step': 4558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:37.087116', 'step': 4558, 'epoch': 2} {'type': 'loss', 'content': 0.02596711739897728, 'timestamp': '2025-09-10 02:30:37.089037', 'step': 4559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:37.117462', 'step': 4559, 'epoch': 2} {'type': 'loss', 'content': 0.0008985972381196916, 'timestamp': '2025-09-10 02:30:37.140940', 'step': 4560, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1582003754624}], 'timestamp': '2025-09-10 02:30:39.092939', 'step': 4560, 'epoch': 2} {'type': 'pplx', 'content': 2204857.4238991113, 'timestamp': '2025-09-10 02:30:39.094819', 'step': 4560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:39.122636', 'step': 4560, 'epoch': 2} {'type': 'loss', 'content': 0.0112817557528615, 'timestamp': '2025-09-10 02:30:39.124427', 'step': 4561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:39.160945', 'step': 4561, 'epoch': 2} {'type': 'loss', 'content': 0.00012598246394190937, 'timestamp': '2025-09-10 02:30:39.169426', 'step': 4562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:39.203089', 'step': 4562, 'epoch': 2} {'type': 'loss', 'content': 0.00012869889906141907, 'timestamp': '2025-09-10 02:30:39.205047', 'step': 4563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:39.235693', 'step': 4563, 'epoch': 2} {'type': 'loss', 'content': 0.00023604150919709355, 'timestamp': '2025-09-10 02:30:39.259181', 'step': 4564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:39.288542', 'step': 4564, 'epoch': 2} {'type': 'loss', 'content': 0.034922044724226, 'timestamp': '2025-09-10 02:30:39.290428', 'step': 4565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:39.322610', 'step': 4565, 'epoch': 2} {'type': 'loss', 'content': 0.0004843877977691591, 'timestamp': '2025-09-10 02:30:39.324425', 'step': 4566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:39.353012', 'step': 4566, 'epoch': 2} {'type': 'loss', 'content': 0.0006594950682483613, 'timestamp': '2025-09-10 02:30:39.354933', 'step': 4567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:39.384341', 'step': 4567, 'epoch': 2} {'type': 'loss', 'content': 0.00014926897711120546, 'timestamp': '2025-09-10 02:30:39.407748', 'step': 4568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:39.436628', 'step': 4568, 'epoch': 2} {'type': 'loss', 'content': 0.01768476329743862, 'timestamp': '2025-09-10 02:30:39.438450', 'step': 4569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:39.468403', 'step': 4569, 'epoch': 2} {'type': 'loss', 'content': 0.0005520475679077208, 'timestamp': '2025-09-10 02:30:39.470545', 'step': 4570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:39.500803', 'step': 4570, 'epoch': 2} {'type': 'loss', 'content': 0.005577580071985722, 'timestamp': '2025-09-10 02:30:39.503562', 'step': 4571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:39.532390', 'step': 4571, 'epoch': 2} {'type': 'loss', 'content': 0.00513619976118207, 'timestamp': '2025-09-10 02:30:39.555672', 'step': 4572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:39.585529', 'step': 4572, 'epoch': 2} {'type': 'loss', 'content': 0.00020523127750493586, 'timestamp': '2025-09-10 02:30:39.589882', 'step': 4573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:39.619568', 'step': 4573, 'epoch': 2} {'type': 'loss', 'content': 0.00042392921750433743, 'timestamp': '2025-09-10 02:30:39.621407', 'step': 4574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:39.658937', 'step': 4574, 'epoch': 2} {'type': 'loss', 'content': 0.007960820570588112, 'timestamp': '2025-09-10 02:30:39.661977', 'step': 4575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:39.691137', 'step': 4575, 'epoch': 2} {'type': 'loss', 'content': 0.009319993667304516, 'timestamp': '2025-09-10 02:30:39.714631', 'step': 4576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:39.743675', 'step': 4576, 'epoch': 2} {'type': 'loss', 'content': 0.00033753132447600365, 'timestamp': '2025-09-10 02:30:39.745492', 'step': 4577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:39.775294', 'step': 4577, 'epoch': 2} {'type': 'loss', 'content': 0.002440785523504019, 'timestamp': '2025-09-10 02:30:39.777906', 'step': 4578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:39.807977', 'step': 4578, 'epoch': 2} {'type': 'loss', 'content': 0.001942989183589816, 'timestamp': '2025-09-10 02:30:39.809938', 'step': 4579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:39.839331', 'step': 4579, 'epoch': 2} {'type': 'loss', 'content': 0.0011816952610388398, 'timestamp': '2025-09-10 02:30:39.862656', 'step': 4580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:39.892184', 'step': 4580, 'epoch': 2} {'type': 'loss', 'content': 0.0007158272783271968, 'timestamp': '2025-09-10 02:30:39.893926', 'step': 4581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:39.923074', 'step': 4581, 'epoch': 2} {'type': 'loss', 'content': 0.0004863930225837976, 'timestamp': '2025-09-10 02:30:39.924984', 'step': 4582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:39.954078', 'step': 4582, 'epoch': 2} {'type': 'loss', 'content': 0.007595433853566647, 'timestamp': '2025-09-10 02:30:39.955951', 'step': 4583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:39.984577', 'step': 4583, 'epoch': 2} {'type': 'loss', 'content': 0.009131629951298237, 'timestamp': '2025-09-10 02:30:40.008371', 'step': 4584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:40.037480', 'step': 4584, 'epoch': 2} {'type': 'loss', 'content': 0.009902708232402802, 'timestamp': '2025-09-10 02:30:40.039426', 'step': 4585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:40.068420', 'step': 4585, 'epoch': 2} {'type': 'loss', 'content': 0.004625441040843725, 'timestamp': '2025-09-10 02:30:40.070286', 'step': 4586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:40.099322', 'step': 4586, 'epoch': 2} {'type': 'loss', 'content': 0.022473273798823357, 'timestamp': '2025-09-10 02:30:40.101943', 'step': 4587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:40.130627', 'step': 4587, 'epoch': 2} {'type': 'loss', 'content': 0.0013099823845550418, 'timestamp': '2025-09-10 02:30:40.154071', 'step': 4588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:40.183884', 'step': 4588, 'epoch': 2} {'type': 'loss', 'content': 0.0021664605010300875, 'timestamp': '2025-09-10 02:30:40.185604', 'step': 4589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:40.214050', 'step': 4589, 'epoch': 2} {'type': 'loss', 'content': 0.011343925260007381, 'timestamp': '2025-09-10 02:30:40.216500', 'step': 4590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:40.245623', 'step': 4590, 'epoch': 2} {'type': 'loss', 'content': 0.0015838586259633303, 'timestamp': '2025-09-10 02:30:40.247464', 'step': 4591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:40.276027', 'step': 4591, 'epoch': 2} {'type': 'loss', 'content': 0.02695656009018421, 'timestamp': '2025-09-10 02:30:40.299275', 'step': 4592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:40.331332', 'step': 4592, 'epoch': 2} {'type': 'loss', 'content': 0.00487537682056427, 'timestamp': '2025-09-10 02:30:40.333187', 'step': 4593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:40.361985', 'step': 4593, 'epoch': 2} {'type': 'loss', 'content': 0.0014115578960627317, 'timestamp': '2025-09-10 02:30:40.363985', 'step': 4594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:40.395100', 'step': 4594, 'epoch': 2} {'type': 'loss', 'content': 0.015568017028272152, 'timestamp': '2025-09-10 02:30:40.397595', 'step': 4595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:40.426632', 'step': 4595, 'epoch': 2} {'type': 'loss', 'content': 0.00027882744325324893, 'timestamp': '2025-09-10 02:30:40.450028', 'step': 4596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:40.478916', 'step': 4596, 'epoch': 2} {'type': 'loss', 'content': 0.0005282434285618365, 'timestamp': '2025-09-10 02:30:40.480836', 'step': 4597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:40.509540', 'step': 4597, 'epoch': 2} {'type': 'loss', 'content': 0.00030529152718372643, 'timestamp': '2025-09-10 02:30:40.512780', 'step': 4598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:40.557635', 'step': 4598, 'epoch': 2} {'type': 'loss', 'content': 0.015784697607159615, 'timestamp': '2025-09-10 02:30:40.559781', 'step': 4599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:40.593319', 'step': 4599, 'epoch': 2} {'type': 'loss', 'content': 7.095612090779468e-05, 'timestamp': '2025-09-10 02:30:40.617932', 'step': 4600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:40.649154', 'step': 4600, 'epoch': 2} {'type': 'loss', 'content': 0.01217495184391737, 'timestamp': '2025-09-10 02:30:40.651516', 'step': 4601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:40.679953', 'step': 4601, 'epoch': 2} {'type': 'loss', 'content': 0.0007999838562682271, 'timestamp': '2025-09-10 02:30:40.682424', 'step': 4602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:40.711149', 'step': 4602, 'epoch': 2} {'type': 'loss', 'content': 0.006470394786447287, 'timestamp': '2025-09-10 02:30:40.720660', 'step': 4603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:40.755854', 'step': 4603, 'epoch': 2} {'type': 'loss', 'content': 0.001985550858080387, 'timestamp': '2025-09-10 02:30:40.779835', 'step': 4604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:40.808505', 'step': 4604, 'epoch': 2} {'type': 'loss', 'content': 0.004694047849625349, 'timestamp': '2025-09-10 02:30:40.811014', 'step': 4605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:40.840819', 'step': 4605, 'epoch': 2} {'type': 'loss', 'content': 0.0005729757831431925, 'timestamp': '2025-09-10 02:30:40.842966', 'step': 4606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:40.871370', 'step': 4606, 'epoch': 2} {'type': 'loss', 'content': 0.00021095320698805153, 'timestamp': '2025-09-10 02:30:40.873318', 'step': 4607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:40.902635', 'step': 4607, 'epoch': 2} {'type': 'loss', 'content': 0.006257088389247656, 'timestamp': '2025-09-10 02:30:40.926884', 'step': 4608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:40.957850', 'step': 4608, 'epoch': 2} {'type': 'loss', 'content': 0.0003300842654425651, 'timestamp': '2025-09-10 02:30:40.960007', 'step': 4609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:40.992161', 'step': 4609, 'epoch': 2} {'type': 'loss', 'content': 0.0005720595945604146, 'timestamp': '2025-09-10 02:30:40.997406', 'step': 4610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:41.026477', 'step': 4610, 'epoch': 2} {'type': 'loss', 'content': 0.012436196208000183, 'timestamp': '2025-09-10 02:30:41.028400', 'step': 4611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:41.057607', 'step': 4611, 'epoch': 2} {'type': 'loss', 'content': 0.00013509248674381524, 'timestamp': '2025-09-10 02:30:41.080877', 'step': 4612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:41.109707', 'step': 4612, 'epoch': 2} {'type': 'loss', 'content': 0.0005333507433533669, 'timestamp': '2025-09-10 02:30:41.111523', 'step': 4613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:41.140906', 'step': 4613, 'epoch': 2} {'type': 'loss', 'content': 0.00045343072270043194, 'timestamp': '2025-09-10 02:30:41.142735', 'step': 4614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:41.171626', 'step': 4614, 'epoch': 2} {'type': 'loss', 'content': 0.0035763964988291264, 'timestamp': '2025-09-10 02:30:41.173663', 'step': 4615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:41.203325', 'step': 4615, 'epoch': 2} {'type': 'loss', 'content': 0.0002577145060058683, 'timestamp': '2025-09-10 02:30:41.226733', 'step': 4616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:41.258264', 'step': 4616, 'epoch': 2} {'type': 'loss', 'content': 0.0007295700488612056, 'timestamp': '2025-09-10 02:30:41.260287', 'step': 4617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:41.290409', 'step': 4617, 'epoch': 2} {'type': 'loss', 'content': 0.11661987006664276, 'timestamp': '2025-09-10 02:30:41.294082', 'step': 4618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:41.332683', 'step': 4618, 'epoch': 2} {'type': 'loss', 'content': 0.0005766888498328626, 'timestamp': '2025-09-10 02:30:41.336365', 'step': 4619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:41.371552', 'step': 4619, 'epoch': 2} {'type': 'loss', 'content': 0.0007064057281240821, 'timestamp': '2025-09-10 02:30:41.395439', 'step': 4620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:41.425081', 'step': 4620, 'epoch': 2} {'type': 'loss', 'content': 0.00012147352390456945, 'timestamp': '2025-09-10 02:30:41.427107', 'step': 4621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:41.455923', 'step': 4621, 'epoch': 2} {'type': 'loss', 'content': 0.001252091838978231, 'timestamp': '2025-09-10 02:30:41.458463', 'step': 4622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:41.487409', 'step': 4622, 'epoch': 2} {'type': 'loss', 'content': 0.005930963438004255, 'timestamp': '2025-09-10 02:30:41.489506', 'step': 4623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:41.519499', 'step': 4623, 'epoch': 2} {'type': 'loss', 'content': 0.0003072503604926169, 'timestamp': '2025-09-10 02:30:41.543542', 'step': 4624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:41.573982', 'step': 4624, 'epoch': 2} {'type': 'loss', 'content': 0.0008662088657729328, 'timestamp': '2025-09-10 02:30:41.575985', 'step': 4625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:41.604687', 'step': 4625, 'epoch': 2} {'type': 'loss', 'content': 0.00038694750401191413, 'timestamp': '2025-09-10 02:30:41.606784', 'step': 4626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:41.636491', 'step': 4626, 'epoch': 2} {'type': 'loss', 'content': 0.00027813235647045076, 'timestamp': '2025-09-10 02:30:41.638314', 'step': 4627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:41.668856', 'step': 4627, 'epoch': 2} {'type': 'loss', 'content': 0.005301487632095814, 'timestamp': '2025-09-10 02:30:41.692623', 'step': 4628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:41.726012', 'step': 4628, 'epoch': 2} {'type': 'loss', 'content': 0.0001333223917754367, 'timestamp': '2025-09-10 02:30:41.728121', 'step': 4629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:41.757633', 'step': 4629, 'epoch': 2} {'type': 'loss', 'content': 0.027996379882097244, 'timestamp': '2025-09-10 02:30:41.759479', 'step': 4630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:41.788366', 'step': 4630, 'epoch': 2} {'type': 'loss', 'content': 0.020284932106733322, 'timestamp': '2025-09-10 02:30:41.790453', 'step': 4631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:41.823358', 'step': 4631, 'epoch': 2} {'type': 'loss', 'content': 0.0066495900973677635, 'timestamp': '2025-09-10 02:30:41.847313', 'step': 4632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:41.881336', 'step': 4632, 'epoch': 2} {'type': 'loss', 'content': 0.0035037498455494642, 'timestamp': '2025-09-10 02:30:41.884140', 'step': 4633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:41.913636', 'step': 4633, 'epoch': 2} {'type': 'loss', 'content': 8.686402725288644e-05, 'timestamp': '2025-09-10 02:30:41.920886', 'step': 4634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:41.954650', 'step': 4634, 'epoch': 2} {'type': 'loss', 'content': 0.00020267089712433517, 'timestamp': '2025-09-10 02:30:41.956835', 'step': 4635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:41.989209', 'step': 4635, 'epoch': 2} {'type': 'loss', 'content': 0.00013673164357896894, 'timestamp': '2025-09-10 02:30:42.012563', 'step': 4636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:42.044167', 'step': 4636, 'epoch': 2} {'type': 'loss', 'content': 0.0030601483304053545, 'timestamp': '2025-09-10 02:30:42.045973', 'step': 4637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:42.075195', 'step': 4637, 'epoch': 2} {'type': 'loss', 'content': 0.0019239679677411914, 'timestamp': '2025-09-10 02:30:42.077883', 'step': 4638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:42.106617', 'step': 4638, 'epoch': 2} {'type': 'loss', 'content': 0.017276791855692863, 'timestamp': '2025-09-10 02:30:42.108523', 'step': 4639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:42.138563', 'step': 4639, 'epoch': 2} {'type': 'loss', 'content': 0.027837634086608887, 'timestamp': '2025-09-10 02:30:42.173814', 'step': 4640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:42.203399', 'step': 4640, 'epoch': 2} {'type': 'loss', 'content': 0.02407458983361721, 'timestamp': '2025-09-10 02:30:42.206145', 'step': 4641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:42.241768', 'step': 4641, 'epoch': 2} {'type': 'loss', 'content': 0.03673195466399193, 'timestamp': '2025-09-10 02:30:42.245392', 'step': 4642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:42.275472', 'step': 4642, 'epoch': 2} {'type': 'loss', 'content': 0.003787655383348465, 'timestamp': '2025-09-10 02:30:42.277433', 'step': 4643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:42.306500', 'step': 4643, 'epoch': 2} {'type': 'loss', 'content': 0.0010021141497418284, 'timestamp': '2025-09-10 02:30:42.329933', 'step': 4644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:42.361217', 'step': 4644, 'epoch': 2} {'type': 'loss', 'content': 0.02015187032520771, 'timestamp': '2025-09-10 02:30:42.365352', 'step': 4645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:42.395877', 'step': 4645, 'epoch': 2} {'type': 'loss', 'content': 0.04627053812146187, 'timestamp': '2025-09-10 02:30:42.397899', 'step': 4646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:42.426539', 'step': 4646, 'epoch': 2} {'type': 'loss', 'content': 0.013461175374686718, 'timestamp': '2025-09-10 02:30:42.428357', 'step': 4647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:42.456911', 'step': 4647, 'epoch': 2} {'type': 'loss', 'content': 0.0011725391959771514, 'timestamp': '2025-09-10 02:30:42.480557', 'step': 4648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:42.512694', 'step': 4648, 'epoch': 2} {'type': 'loss', 'content': 0.02038337104022503, 'timestamp': '2025-09-10 02:30:42.514237', 'step': 4649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:42.545868', 'step': 4649, 'epoch': 2} {'type': 'loss', 'content': 0.0018510989611968398, 'timestamp': '2025-09-10 02:30:42.547783', 'step': 4650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:42.579572', 'step': 4650, 'epoch': 2} {'type': 'loss', 'content': 0.0019442373886704445, 'timestamp': '2025-09-10 02:30:42.581364', 'step': 4651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:42.610080', 'step': 4651, 'epoch': 2} {'type': 'loss', 'content': 0.0031817667186260223, 'timestamp': '2025-09-10 02:30:42.633346', 'step': 4652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:42.663207', 'step': 4652, 'epoch': 2} {'type': 'loss', 'content': 0.00040753273060545325, 'timestamp': '2025-09-10 02:30:42.665012', 'step': 4653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:42.693702', 'step': 4653, 'epoch': 2} {'type': 'loss', 'content': 0.0002709409745875746, 'timestamp': '2025-09-10 02:30:42.695937', 'step': 4654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:42.725483', 'step': 4654, 'epoch': 2} {'type': 'loss', 'content': 0.0008405909757129848, 'timestamp': '2025-09-10 02:30:42.727579', 'step': 4655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:42.756437', 'step': 4655, 'epoch': 2} {'type': 'loss', 'content': 0.000137664086651057, 'timestamp': '2025-09-10 02:30:42.781404', 'step': 4656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:42.810949', 'step': 4656, 'epoch': 2} {'type': 'loss', 'content': 0.008599597029387951, 'timestamp': '2025-09-10 02:30:42.813001', 'step': 4657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:42.842445', 'step': 4657, 'epoch': 2} {'type': 'loss', 'content': 0.0001108621945604682, 'timestamp': '2025-09-10 02:30:42.844564', 'step': 4658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:42.876877', 'step': 4658, 'epoch': 2} {'type': 'loss', 'content': 0.0005707357777282596, 'timestamp': '2025-09-10 02:30:42.879001', 'step': 4659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:42.915123', 'step': 4659, 'epoch': 2} {'type': 'loss', 'content': 0.0015837346436455846, 'timestamp': '2025-09-10 02:30:42.939759', 'step': 4660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:42.969442', 'step': 4660, 'epoch': 2} {'type': 'loss', 'content': 0.0008016590145416558, 'timestamp': '2025-09-10 02:30:42.972439', 'step': 4661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:43.010638', 'step': 4661, 'epoch': 2} {'type': 'loss', 'content': 0.02274659276008606, 'timestamp': '2025-09-10 02:30:43.012657', 'step': 4662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:43.041706', 'step': 4662, 'epoch': 2} {'type': 'loss', 'content': 0.037858959287405014, 'timestamp': '2025-09-10 02:30:43.048490', 'step': 4663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:43.079621', 'step': 4663, 'epoch': 2} {'type': 'loss', 'content': 0.04054301232099533, 'timestamp': '2025-09-10 02:30:43.105035', 'step': 4664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:43.134391', 'step': 4664, 'epoch': 2} {'type': 'loss', 'content': 0.0031110390555113554, 'timestamp': '2025-09-10 02:30:43.136919', 'step': 4665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:43.167371', 'step': 4665, 'epoch': 2} {'type': 'loss', 'content': 0.0007724833558313549, 'timestamp': '2025-09-10 02:30:43.173496', 'step': 4666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:43.209853', 'step': 4666, 'epoch': 2} {'type': 'loss', 'content': 0.002394846873357892, 'timestamp': '2025-09-10 02:30:43.216483', 'step': 4667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:43.249223', 'step': 4667, 'epoch': 2} {'type': 'loss', 'content': 0.00028845228371210396, 'timestamp': '2025-09-10 02:30:43.273554', 'step': 4668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:43.302254', 'step': 4668, 'epoch': 2} {'type': 'loss', 'content': 0.04577751085162163, 'timestamp': '2025-09-10 02:30:43.306447', 'step': 4669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:43.337230', 'step': 4669, 'epoch': 2} {'type': 'loss', 'content': 0.02678990364074707, 'timestamp': '2025-09-10 02:30:43.339109', 'step': 4670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:43.367863', 'step': 4670, 'epoch': 2} {'type': 'loss', 'content': 0.011855545453727245, 'timestamp': '2025-09-10 02:30:43.370035', 'step': 4671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:43.398982', 'step': 4671, 'epoch': 2} {'type': 'loss', 'content': 0.04165295884013176, 'timestamp': '2025-09-10 02:30:43.422536', 'step': 4672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:43.451636', 'step': 4672, 'epoch': 2} {'type': 'loss', 'content': 0.017214450985193253, 'timestamp': '2025-09-10 02:30:43.453514', 'step': 4673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:43.487264', 'step': 4673, 'epoch': 2} {'type': 'loss', 'content': 0.009694449603557587, 'timestamp': '2025-09-10 02:30:43.491378', 'step': 4674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:43.522045', 'step': 4674, 'epoch': 2} {'type': 'loss', 'content': 0.006493962835520506, 'timestamp': '2025-09-10 02:30:43.525062', 'step': 4675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:43.557196', 'step': 4675, 'epoch': 2} {'type': 'loss', 'content': 0.0008258892921730876, 'timestamp': '2025-09-10 02:30:43.580796', 'step': 4676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:43.609692', 'step': 4676, 'epoch': 2} {'type': 'loss', 'content': 0.016906818374991417, 'timestamp': '2025-09-10 02:30:43.611806', 'step': 4677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:43.640809', 'step': 4677, 'epoch': 2} {'type': 'loss', 'content': 0.0019898030441254377, 'timestamp': '2025-09-10 02:30:43.643271', 'step': 4678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:43.673630', 'step': 4678, 'epoch': 2} {'type': 'loss', 'content': 0.0012549569364637136, 'timestamp': '2025-09-10 02:30:43.675650', 'step': 4679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:43.704425', 'step': 4679, 'epoch': 2} {'type': 'loss', 'content': 0.003011396387591958, 'timestamp': '2025-09-10 02:30:43.733089', 'step': 4680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:43.763147', 'step': 4680, 'epoch': 2} {'type': 'loss', 'content': 0.0006827022880315781, 'timestamp': '2025-09-10 02:30:43.765001', 'step': 4681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:43.793751', 'step': 4681, 'epoch': 2} {'type': 'loss', 'content': 0.019386066123843193, 'timestamp': '2025-09-10 02:30:43.796124', 'step': 4682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:43.824919', 'step': 4682, 'epoch': 2} {'type': 'loss', 'content': 0.07785572856664658, 'timestamp': '2025-09-10 02:30:43.826963', 'step': 4683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:43.855483', 'step': 4683, 'epoch': 2} {'type': 'loss', 'content': 0.003492309246212244, 'timestamp': '2025-09-10 02:30:43.879161', 'step': 4684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:43.909882', 'step': 4684, 'epoch': 2} {'type': 'loss', 'content': 0.005199705250561237, 'timestamp': '2025-09-10 02:30:43.911875', 'step': 4685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:43.943726', 'step': 4685, 'epoch': 2} {'type': 'loss', 'content': 0.0023987186141312122, 'timestamp': '2025-09-10 02:30:43.945783', 'step': 4686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:43.974628', 'step': 4686, 'epoch': 2} {'type': 'loss', 'content': 0.014172668568789959, 'timestamp': '2025-09-10 02:30:43.977686', 'step': 4687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:44.014211', 'step': 4687, 'epoch': 2} {'type': 'loss', 'content': 0.003468379145488143, 'timestamp': '2025-09-10 02:30:44.037738', 'step': 4688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:44.066953', 'step': 4688, 'epoch': 2} {'type': 'loss', 'content': 0.039519794285297394, 'timestamp': '2025-09-10 02:30:44.069521', 'step': 4689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:44.099205', 'step': 4689, 'epoch': 2} {'type': 'loss', 'content': 0.029918795451521873, 'timestamp': '2025-09-10 02:30:44.102615', 'step': 4690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:44.131355', 'step': 4690, 'epoch': 2} {'type': 'loss', 'content': 0.0024169038515537977, 'timestamp': '2025-09-10 02:30:44.133254', 'step': 4691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:44.161931', 'step': 4691, 'epoch': 2} {'type': 'loss', 'content': 0.025109851732850075, 'timestamp': '2025-09-10 02:30:44.189382', 'step': 4692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:44.221348', 'step': 4692, 'epoch': 2} {'type': 'loss', 'content': 0.005677707493305206, 'timestamp': '2025-09-10 02:30:44.225395', 'step': 4693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:44.255279', 'step': 4693, 'epoch': 2} {'type': 'loss', 'content': 0.027732862159609795, 'timestamp': '2025-09-10 02:30:44.257197', 'step': 4694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:44.285701', 'step': 4694, 'epoch': 2} {'type': 'loss', 'content': 0.005289082881063223, 'timestamp': '2025-09-10 02:30:44.288032', 'step': 4695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:44.316635', 'step': 4695, 'epoch': 2} {'type': 'loss', 'content': 0.006506029516458511, 'timestamp': '2025-09-10 02:30:44.340230', 'step': 4696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:44.373189', 'step': 4696, 'epoch': 2} {'type': 'loss', 'content': 0.0031115796882659197, 'timestamp': '2025-09-10 02:30:44.375106', 'step': 4697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:44.404596', 'step': 4697, 'epoch': 2} {'type': 'loss', 'content': 0.0010988533031195402, 'timestamp': '2025-09-10 02:30:44.406836', 'step': 4698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:44.436008', 'step': 4698, 'epoch': 2} {'type': 'loss', 'content': 0.01683671586215496, 'timestamp': '2025-09-10 02:30:44.437915', 'step': 4699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:44.467699', 'step': 4699, 'epoch': 2} {'type': 'loss', 'content': 0.02131526544690132, 'timestamp': '2025-09-10 02:30:44.491430', 'step': 4700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:44.522417', 'step': 4700, 'epoch': 2} {'type': 'loss', 'content': 0.008895104750990868, 'timestamp': '2025-09-10 02:30:44.532048', 'step': 4701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:44.564307', 'step': 4701, 'epoch': 2} {'type': 'loss', 'content': 0.03631191700696945, 'timestamp': '2025-09-10 02:30:44.566308', 'step': 4702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:44.595457', 'step': 4702, 'epoch': 2} {'type': 'loss', 'content': 0.023046817630529404, 'timestamp': '2025-09-10 02:30:44.599918', 'step': 4703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:44.629210', 'step': 4703, 'epoch': 2} {'type': 'loss', 'content': 0.002487179124727845, 'timestamp': '2025-09-10 02:30:44.653036', 'step': 4704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:44.682241', 'step': 4704, 'epoch': 2} {'type': 'loss', 'content': 0.008304216898977757, 'timestamp': '2025-09-10 02:30:44.684165', 'step': 4705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:44.712626', 'step': 4705, 'epoch': 2} {'type': 'loss', 'content': 0.03096388466656208, 'timestamp': '2025-09-10 02:30:44.714530', 'step': 4706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:44.743155', 'step': 4706, 'epoch': 2} {'type': 'loss', 'content': 0.00456048222258687, 'timestamp': '2025-09-10 02:30:44.745119', 'step': 4707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:44.774873', 'step': 4707, 'epoch': 2} {'type': 'loss', 'content': 0.022228265181183815, 'timestamp': '2025-09-10 02:30:44.798344', 'step': 4708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:44.827008', 'step': 4708, 'epoch': 2} {'type': 'loss', 'content': 0.005984528921544552, 'timestamp': '2025-09-10 02:30:44.828914', 'step': 4709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:44.863219', 'step': 4709, 'epoch': 2} {'type': 'loss', 'content': 0.0011961181880906224, 'timestamp': '2025-09-10 02:30:44.865556', 'step': 4710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:44.895578', 'step': 4710, 'epoch': 2} {'type': 'loss', 'content': 0.013456185348331928, 'timestamp': '2025-09-10 02:30:44.899777', 'step': 4711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:44.932157', 'step': 4711, 'epoch': 2} {'type': 'loss', 'content': 0.0036757667548954487, 'timestamp': '2025-09-10 02:30:44.956777', 'step': 4712, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1582003754624}], 'timestamp': '2025-09-10 02:30:46.943632', 'step': 4712, 'epoch': 2} {'type': 'pplx', 'content': 2189411.935164041, 'timestamp': '2025-09-10 02:30:46.950644', 'step': 4712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:46.983917', 'step': 4712, 'epoch': 2} {'type': 'loss', 'content': 0.012935475446283817, 'timestamp': '2025-09-10 02:30:46.990285', 'step': 4713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:47.022140', 'step': 4713, 'epoch': 2} {'type': 'loss', 'content': 0.028412139043211937, 'timestamp': '2025-09-10 02:30:47.024790', 'step': 4714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:47.057810', 'step': 4714, 'epoch': 2} {'type': 'loss', 'content': 0.00245910813100636, 'timestamp': '2025-09-10 02:30:47.059944', 'step': 4715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:47.093278', 'step': 4715, 'epoch': 2} {'type': 'loss', 'content': 0.0012804417638108134, 'timestamp': '2025-09-10 02:30:47.116909', 'step': 4716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:47.146954', 'step': 4716, 'epoch': 2} {'type': 'loss', 'content': 0.014916189014911652, 'timestamp': '2025-09-10 02:30:47.149810', 'step': 4717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:47.179596', 'step': 4717, 'epoch': 2} {'type': 'loss', 'content': 0.027777446433901787, 'timestamp': '2025-09-10 02:30:47.181586', 'step': 4718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:47.212111', 'step': 4718, 'epoch': 2} {'type': 'loss', 'content': 0.0018172768177464604, 'timestamp': '2025-09-10 02:30:47.217945', 'step': 4719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:47.249552', 'step': 4719, 'epoch': 2} {'type': 'loss', 'content': 0.03047746792435646, 'timestamp': '2025-09-10 02:30:47.273245', 'step': 4720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:47.307706', 'step': 4720, 'epoch': 2} {'type': 'loss', 'content': 0.0038779808674007654, 'timestamp': '2025-09-10 02:30:47.311321', 'step': 4721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:47.342423', 'step': 4721, 'epoch': 2} {'type': 'loss', 'content': 0.009764066897332668, 'timestamp': '2025-09-10 02:30:47.344400', 'step': 4722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:47.375506', 'step': 4722, 'epoch': 2} {'type': 'loss', 'content': 0.005471152253448963, 'timestamp': '2025-09-10 02:30:47.378413', 'step': 4723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:47.415050', 'step': 4723, 'epoch': 2} {'type': 'loss', 'content': 0.0015373850474134088, 'timestamp': '2025-09-10 02:30:47.443267', 'step': 4724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:47.476622', 'step': 4724, 'epoch': 2} {'type': 'loss', 'content': 0.017080390825867653, 'timestamp': '2025-09-10 02:30:47.481587', 'step': 4725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:47.513468', 'step': 4725, 'epoch': 2} {'type': 'loss', 'content': 0.10140477120876312, 'timestamp': '2025-09-10 02:30:47.517208', 'step': 4726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:47.548532', 'step': 4726, 'epoch': 2} {'type': 'loss', 'content': 0.005187637638300657, 'timestamp': '2025-09-10 02:30:47.550419', 'step': 4727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:47.583564', 'step': 4727, 'epoch': 2} {'type': 'loss', 'content': 0.003937442786991596, 'timestamp': '2025-09-10 02:30:47.607657', 'step': 4728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:47.641128', 'step': 4728, 'epoch': 2} {'type': 'loss', 'content': 0.010151097550988197, 'timestamp': '2025-09-10 02:30:47.643458', 'step': 4729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:47.672964', 'step': 4729, 'epoch': 2} {'type': 'loss', 'content': 0.013919848017394543, 'timestamp': '2025-09-10 02:30:47.675010', 'step': 4730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:47.705200', 'step': 4730, 'epoch': 2} {'type': 'loss', 'content': 0.033086393028497696, 'timestamp': '2025-09-10 02:30:47.707519', 'step': 4731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:47.760257', 'step': 4731, 'epoch': 2} {'type': 'loss', 'content': 0.006276473868638277, 'timestamp': '2025-09-10 02:30:47.783966', 'step': 4732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:47.825242', 'step': 4732, 'epoch': 2} {'type': 'loss', 'content': 0.000802433758508414, 'timestamp': '2025-09-10 02:30:47.827097', 'step': 4733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:47.856141', 'step': 4733, 'epoch': 2} {'type': 'loss', 'content': 0.0005969117628410459, 'timestamp': '2025-09-10 02:30:47.858172', 'step': 4734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:47.888222', 'step': 4734, 'epoch': 2} {'type': 'loss', 'content': 0.013208402320742607, 'timestamp': '2025-09-10 02:30:47.890404', 'step': 4735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:47.922676', 'step': 4735, 'epoch': 2} {'type': 'loss', 'content': 0.002340029925107956, 'timestamp': '2025-09-10 02:30:47.946404', 'step': 4736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:47.975599', 'step': 4736, 'epoch': 2} {'type': 'loss', 'content': 0.010006987489759922, 'timestamp': '2025-09-10 02:30:47.977595', 'step': 4737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:48.007076', 'step': 4737, 'epoch': 2} {'type': 'loss', 'content': 0.0012558095622807741, 'timestamp': '2025-09-10 02:30:48.009670', 'step': 4738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:48.038706', 'step': 4738, 'epoch': 2} {'type': 'loss', 'content': 0.0228542722761631, 'timestamp': '2025-09-10 02:30:48.041043', 'step': 4739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:48.070160', 'step': 4739, 'epoch': 2} {'type': 'loss', 'content': 0.018659209832549095, 'timestamp': '2025-09-10 02:30:48.093650', 'step': 4740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:48.123026', 'step': 4740, 'epoch': 2} {'type': 'loss', 'content': 0.03863928094506264, 'timestamp': '2025-09-10 02:30:48.125075', 'step': 4741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:48.162263', 'step': 4741, 'epoch': 2} {'type': 'loss', 'content': 0.0017381685320287943, 'timestamp': '2025-09-10 02:30:48.164670', 'step': 4742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:48.204703', 'step': 4742, 'epoch': 2} {'type': 'loss', 'content': 0.0007383439806289971, 'timestamp': '2025-09-10 02:30:48.206780', 'step': 4743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:48.235891', 'step': 4743, 'epoch': 2} {'type': 'loss', 'content': 0.004693225957453251, 'timestamp': '2025-09-10 02:30:48.259720', 'step': 4744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:48.291951', 'step': 4744, 'epoch': 2} {'type': 'loss', 'content': 0.0011855376651510596, 'timestamp': '2025-09-10 02:30:48.294037', 'step': 4745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:48.323530', 'step': 4745, 'epoch': 2} {'type': 'loss', 'content': 0.03477257490158081, 'timestamp': '2025-09-10 02:30:48.325739', 'step': 4746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:48.356033', 'step': 4746, 'epoch': 2} {'type': 'loss', 'content': 0.020502671599388123, 'timestamp': '2025-09-10 02:30:48.359530', 'step': 4747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:48.390703', 'step': 4747, 'epoch': 2} {'type': 'loss', 'content': 0.004528583027422428, 'timestamp': '2025-09-10 02:30:48.414559', 'step': 4748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:48.445247', 'step': 4748, 'epoch': 2} {'type': 'loss', 'content': 0.028670435771346092, 'timestamp': '2025-09-10 02:30:48.447524', 'step': 4749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:48.477658', 'step': 4749, 'epoch': 2} {'type': 'loss', 'content': 0.021358918398618698, 'timestamp': '2025-09-10 02:30:48.480839', 'step': 4750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:48.513074', 'step': 4750, 'epoch': 2} {'type': 'loss', 'content': 0.0009635729365982115, 'timestamp': '2025-09-10 02:30:48.515355', 'step': 4751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:48.545419', 'step': 4751, 'epoch': 2} {'type': 'loss', 'content': 0.013524622656404972, 'timestamp': '2025-09-10 02:30:48.569661', 'step': 4752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:48.599023', 'step': 4752, 'epoch': 2} {'type': 'loss', 'content': 0.006929680239409208, 'timestamp': '2025-09-10 02:30:48.600950', 'step': 4753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:48.630016', 'step': 4753, 'epoch': 2} {'type': 'loss', 'content': 0.00010071784345200285, 'timestamp': '2025-09-10 02:30:48.632782', 'step': 4754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:48.664816', 'step': 4754, 'epoch': 2} {'type': 'loss', 'content': 0.0015575990546494722, 'timestamp': '2025-09-10 02:30:48.667070', 'step': 4755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:48.697112', 'step': 4755, 'epoch': 2} {'type': 'loss', 'content': 0.0017869753064587712, 'timestamp': '2025-09-10 02:30:48.720950', 'step': 4756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:48.750314', 'step': 4756, 'epoch': 2} {'type': 'loss', 'content': 0.0012821994023397565, 'timestamp': '2025-09-10 02:30:48.752332', 'step': 4757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:48.781699', 'step': 4757, 'epoch': 2} {'type': 'loss', 'content': 0.0017570492345839739, 'timestamp': '2025-09-10 02:30:48.783961', 'step': 4758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:48.813367', 'step': 4758, 'epoch': 2} {'type': 'loss', 'content': 0.023131389170885086, 'timestamp': '2025-09-10 02:30:48.815350', 'step': 4759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:48.844640', 'step': 4759, 'epoch': 2} {'type': 'loss', 'content': 0.005593287758529186, 'timestamp': '2025-09-10 02:30:48.868429', 'step': 4760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:48.897767', 'step': 4760, 'epoch': 2} {'type': 'loss', 'content': 0.00040549522964283824, 'timestamp': '2025-09-10 02:30:48.899881', 'step': 4761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:48.935826', 'step': 4761, 'epoch': 2} {'type': 'loss', 'content': 0.026496440172195435, 'timestamp': '2025-09-10 02:30:48.938266', 'step': 4762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:48.967707', 'step': 4762, 'epoch': 2} {'type': 'loss', 'content': 0.008946560323238373, 'timestamp': '2025-09-10 02:30:48.972334', 'step': 4763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:49.009629', 'step': 4763, 'epoch': 2} {'type': 'loss', 'content': 0.0011749848490580916, 'timestamp': '2025-09-10 02:30:49.033410', 'step': 4764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:49.062968', 'step': 4764, 'epoch': 2} {'type': 'loss', 'content': 0.0005551399663090706, 'timestamp': '2025-09-10 02:30:49.065691', 'step': 4765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:49.096877', 'step': 4765, 'epoch': 2} {'type': 'loss', 'content': 0.014968165196478367, 'timestamp': '2025-09-10 02:30:49.098825', 'step': 4766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:49.134078', 'step': 4766, 'epoch': 2} {'type': 'loss', 'content': 0.0019150032894685864, 'timestamp': '2025-09-10 02:30:49.136785', 'step': 4767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:49.170733', 'step': 4767, 'epoch': 2} {'type': 'loss', 'content': 0.016086271032691002, 'timestamp': '2025-09-10 02:30:49.195400', 'step': 4768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:49.225205', 'step': 4768, 'epoch': 2} {'type': 'loss', 'content': 0.006012638099491596, 'timestamp': '2025-09-10 02:30:49.227439', 'step': 4769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:49.262108', 'step': 4769, 'epoch': 2} {'type': 'loss', 'content': 0.002948788460344076, 'timestamp': '2025-09-10 02:30:49.264416', 'step': 4770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:49.296867', 'step': 4770, 'epoch': 2} {'type': 'loss', 'content': 0.003266706829890609, 'timestamp': '2025-09-10 02:30:49.299099', 'step': 4771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:49.328573', 'step': 4771, 'epoch': 2} {'type': 'loss', 'content': 0.03831540793180466, 'timestamp': '2025-09-10 02:30:49.352343', 'step': 4772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:49.382695', 'step': 4772, 'epoch': 2} {'type': 'loss', 'content': 0.004111675079911947, 'timestamp': '2025-09-10 02:30:49.384809', 'step': 4773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:49.414886', 'step': 4773, 'epoch': 2} {'type': 'loss', 'content': 0.006892775185406208, 'timestamp': '2025-09-10 02:30:49.419889', 'step': 4774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:49.455414', 'step': 4774, 'epoch': 2} {'type': 'loss', 'content': 0.0016535267932340503, 'timestamp': '2025-09-10 02:30:49.458273', 'step': 4775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:49.491932', 'step': 4775, 'epoch': 2} {'type': 'loss', 'content': 0.07653103768825531, 'timestamp': '2025-09-10 02:30:49.520106', 'step': 4776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:49.553327', 'step': 4776, 'epoch': 2} {'type': 'loss', 'content': 0.009428937919437885, 'timestamp': '2025-09-10 02:30:49.556060', 'step': 4777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:49.587052', 'step': 4777, 'epoch': 2} {'type': 'loss', 'content': 0.09126269817352295, 'timestamp': '2025-09-10 02:30:49.589186', 'step': 4778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:49.618868', 'step': 4778, 'epoch': 2} {'type': 'loss', 'content': 0.028932299464941025, 'timestamp': '2025-09-10 02:30:49.622162', 'step': 4779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:49.652698', 'step': 4779, 'epoch': 2} {'type': 'loss', 'content': 0.0021292385645210743, 'timestamp': '2025-09-10 02:30:49.676942', 'step': 4780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:49.707757', 'step': 4780, 'epoch': 2} {'type': 'loss', 'content': 0.05218474194407463, 'timestamp': '2025-09-10 02:30:49.710957', 'step': 4781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:49.741997', 'step': 4781, 'epoch': 2} {'type': 'loss', 'content': 0.026806095615029335, 'timestamp': '2025-09-10 02:30:49.744949', 'step': 4782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:49.779894', 'step': 4782, 'epoch': 2} {'type': 'loss', 'content': 0.008151252754032612, 'timestamp': '2025-09-10 02:30:49.781983', 'step': 4783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:49.810960', 'step': 4783, 'epoch': 2} {'type': 'loss', 'content': 0.03712259605526924, 'timestamp': '2025-09-10 02:30:49.834467', 'step': 4784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:49.865568', 'step': 4784, 'epoch': 2} {'type': 'loss', 'content': 0.0027715428732335567, 'timestamp': '2025-09-10 02:30:49.869335', 'step': 4785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:49.902393', 'step': 4785, 'epoch': 2} {'type': 'loss', 'content': 0.02292325720191002, 'timestamp': '2025-09-10 02:30:49.904897', 'step': 4786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:49.935530', 'step': 4786, 'epoch': 2} {'type': 'loss', 'content': 0.0007113219471648335, 'timestamp': '2025-09-10 02:30:49.940047', 'step': 4787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:49.969873', 'step': 4787, 'epoch': 2} {'type': 'loss', 'content': 0.014475691132247448, 'timestamp': '2025-09-10 02:30:49.993371', 'step': 4788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:50.024696', 'step': 4788, 'epoch': 2} {'type': 'loss', 'content': 0.002319976920261979, 'timestamp': '2025-09-10 02:30:50.032337', 'step': 4789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:50.062410', 'step': 4789, 'epoch': 2} {'type': 'loss', 'content': 0.032773204147815704, 'timestamp': '2025-09-10 02:30:50.065676', 'step': 4790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:50.098058', 'step': 4790, 'epoch': 2} {'type': 'loss', 'content': 0.0004146102874074131, 'timestamp': '2025-09-10 02:30:50.102498', 'step': 4791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:50.131632', 'step': 4791, 'epoch': 2} {'type': 'loss', 'content': 0.012024929746985435, 'timestamp': '2025-09-10 02:30:50.154931', 'step': 4792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:50.184429', 'step': 4792, 'epoch': 2} {'type': 'loss', 'content': 0.00999387912452221, 'timestamp': '2025-09-10 02:30:50.186142', 'step': 4793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:50.215440', 'step': 4793, 'epoch': 2} {'type': 'loss', 'content': 0.0016098637133836746, 'timestamp': '2025-09-10 02:30:50.217605', 'step': 4794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:50.246943', 'step': 4794, 'epoch': 2} {'type': 'loss', 'content': 0.016725609079003334, 'timestamp': '2025-09-10 02:30:50.248875', 'step': 4795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:50.277914', 'step': 4795, 'epoch': 2} {'type': 'loss', 'content': 0.01718818210065365, 'timestamp': '2025-09-10 02:30:50.301528', 'step': 4796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:50.330987', 'step': 4796, 'epoch': 2} {'type': 'loss', 'content': 0.015896115452051163, 'timestamp': '2025-09-10 02:30:50.333079', 'step': 4797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:50.362279', 'step': 4797, 'epoch': 2} {'type': 'loss', 'content': 0.002817119937390089, 'timestamp': '2025-09-10 02:30:50.364114', 'step': 4798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:50.393871', 'step': 4798, 'epoch': 2} {'type': 'loss', 'content': 0.04349520429968834, 'timestamp': '2025-09-10 02:30:50.395659', 'step': 4799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:50.424257', 'step': 4799, 'epoch': 2} {'type': 'loss', 'content': 0.001361390925012529, 'timestamp': '2025-09-10 02:30:50.448987', 'step': 4800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:50.480165', 'step': 4800, 'epoch': 2} {'type': 'loss', 'content': 0.01079056691378355, 'timestamp': '2025-09-10 02:30:50.483438', 'step': 4801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:50.512611', 'step': 4801, 'epoch': 2} {'type': 'loss', 'content': 0.002668142318725586, 'timestamp': '2025-09-10 02:30:50.515076', 'step': 4802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:50.545561', 'step': 4802, 'epoch': 2} {'type': 'loss', 'content': 0.022520411759614944, 'timestamp': '2025-09-10 02:30:50.547374', 'step': 4803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:50.576455', 'step': 4803, 'epoch': 2} {'type': 'loss', 'content': 0.010405519045889378, 'timestamp': '2025-09-10 02:30:50.600396', 'step': 4804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:50.635884', 'step': 4804, 'epoch': 2} {'type': 'loss', 'content': 0.01020369678735733, 'timestamp': '2025-09-10 02:30:50.637552', 'step': 4805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:50.666451', 'step': 4805, 'epoch': 2} {'type': 'loss', 'content': 0.006493265740573406, 'timestamp': '2025-09-10 02:30:50.668314', 'step': 4806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:50.697063', 'step': 4806, 'epoch': 2} {'type': 'loss', 'content': 0.04450357332825661, 'timestamp': '2025-09-10 02:30:50.698998', 'step': 4807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:50.728647', 'step': 4807, 'epoch': 2} {'type': 'loss', 'content': 0.00014824536629021168, 'timestamp': '2025-09-10 02:30:50.752699', 'step': 4808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:50.781969', 'step': 4808, 'epoch': 2} {'type': 'loss', 'content': 0.013092207722365856, 'timestamp': '2025-09-10 02:30:50.783988', 'step': 4809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:50.813679', 'step': 4809, 'epoch': 2} {'type': 'loss', 'content': 0.03313382714986801, 'timestamp': '2025-09-10 02:30:50.815639', 'step': 4810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:50.844950', 'step': 4810, 'epoch': 2} {'type': 'loss', 'content': 0.006113601382821798, 'timestamp': '2025-09-10 02:30:50.847416', 'step': 4811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:50.876334', 'step': 4811, 'epoch': 2} {'type': 'loss', 'content': 0.0017574252560734749, 'timestamp': '2025-09-10 02:30:50.900001', 'step': 4812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:50.930074', 'step': 4812, 'epoch': 2} {'type': 'loss', 'content': 0.00816971156746149, 'timestamp': '2025-09-10 02:30:50.932172', 'step': 4813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:50.960959', 'step': 4813, 'epoch': 2} {'type': 'loss', 'content': 0.000925055705010891, 'timestamp': '2025-09-10 02:30:50.962899', 'step': 4814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:50.992328', 'step': 4814, 'epoch': 2} {'type': 'loss', 'content': 0.014087872579693794, 'timestamp': '2025-09-10 02:30:50.995190', 'step': 4815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:51.023988', 'step': 4815, 'epoch': 2} {'type': 'loss', 'content': 0.0012622555950656533, 'timestamp': '2025-09-10 02:30:51.047401', 'step': 4816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:51.078791', 'step': 4816, 'epoch': 2} {'type': 'loss', 'content': 0.020634431391954422, 'timestamp': '2025-09-10 02:30:51.080583', 'step': 4817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:51.109384', 'step': 4817, 'epoch': 2} {'type': 'loss', 'content': 0.0009810065384954214, 'timestamp': '2025-09-10 02:30:51.111223', 'step': 4818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:51.141359', 'step': 4818, 'epoch': 2} {'type': 'loss', 'content': 0.0026875983458012342, 'timestamp': '2025-09-10 02:30:51.143427', 'step': 4819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:51.172694', 'step': 4819, 'epoch': 2} {'type': 'loss', 'content': 0.029066959396004677, 'timestamp': '2025-09-10 02:30:51.196168', 'step': 4820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:51.227063', 'step': 4820, 'epoch': 2} {'type': 'loss', 'content': 0.00033089984208345413, 'timestamp': '2025-09-10 02:30:51.230213', 'step': 4821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:51.259280', 'step': 4821, 'epoch': 2} {'type': 'loss', 'content': 0.00021650652342941612, 'timestamp': '2025-09-10 02:30:51.261415', 'step': 4822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:51.290713', 'step': 4822, 'epoch': 2} {'type': 'loss', 'content': 0.0036471416242420673, 'timestamp': '2025-09-10 02:30:51.292922', 'step': 4823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:51.321614', 'step': 4823, 'epoch': 2} {'type': 'loss', 'content': 0.0007608329760842025, 'timestamp': '2025-09-10 02:30:51.344783', 'step': 4824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:51.373954', 'step': 4824, 'epoch': 2} {'type': 'loss', 'content': 0.002901230240240693, 'timestamp': '2025-09-10 02:30:51.375868', 'step': 4825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:51.405008', 'step': 4825, 'epoch': 2} {'type': 'loss', 'content': 0.0002802798990160227, 'timestamp': '2025-09-10 02:30:51.407983', 'step': 4826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:51.437467', 'step': 4826, 'epoch': 2} {'type': 'loss', 'content': 0.002789972350001335, 'timestamp': '2025-09-10 02:30:51.439277', 'step': 4827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:51.468325', 'step': 4827, 'epoch': 2} {'type': 'loss', 'content': 0.00826730765402317, 'timestamp': '2025-09-10 02:30:51.491634', 'step': 4828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:51.520646', 'step': 4828, 'epoch': 2} {'type': 'loss', 'content': 0.0010153782786801457, 'timestamp': '2025-09-10 02:30:51.522743', 'step': 4829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:51.551519', 'step': 4829, 'epoch': 2} {'type': 'loss', 'content': 0.0037035089917480946, 'timestamp': '2025-09-10 02:30:51.553437', 'step': 4830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:51.582877', 'step': 4830, 'epoch': 2} {'type': 'loss', 'content': 0.0038321143947541714, 'timestamp': '2025-09-10 02:30:51.587180', 'step': 4831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:51.616536', 'step': 4831, 'epoch': 2} {'type': 'loss', 'content': 0.00136780203320086, 'timestamp': '2025-09-10 02:30:51.639827', 'step': 4832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:51.671772', 'step': 4832, 'epoch': 2} {'type': 'loss', 'content': 0.0012906203046441078, 'timestamp': '2025-09-10 02:30:51.673824', 'step': 4833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:51.703020', 'step': 4833, 'epoch': 2} {'type': 'loss', 'content': 0.006140691693872213, 'timestamp': '2025-09-10 02:30:51.704864', 'step': 4834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:51.733578', 'step': 4834, 'epoch': 2} {'type': 'loss', 'content': 0.01278417557477951, 'timestamp': '2025-09-10 02:30:51.735565', 'step': 4835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:51.765359', 'step': 4835, 'epoch': 2} {'type': 'loss', 'content': 0.01972927153110504, 'timestamp': '2025-09-10 02:30:51.788949', 'step': 4836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:51.819553', 'step': 4836, 'epoch': 2} {'type': 'loss', 'content': 0.00920105166733265, 'timestamp': '2025-09-10 02:30:51.821472', 'step': 4837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:51.850252', 'step': 4837, 'epoch': 2} {'type': 'loss', 'content': 0.000890783965587616, 'timestamp': '2025-09-10 02:30:51.854010', 'step': 4838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:51.885129', 'step': 4838, 'epoch': 2} {'type': 'loss', 'content': 0.026027875021100044, 'timestamp': '2025-09-10 02:30:51.887543', 'step': 4839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:51.916875', 'step': 4839, 'epoch': 2} {'type': 'loss', 'content': 0.0005559767014347017, 'timestamp': '2025-09-10 02:30:51.943723', 'step': 4840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:51.973426', 'step': 4840, 'epoch': 2} {'type': 'loss', 'content': 0.016504112631082535, 'timestamp': '2025-09-10 02:30:51.975972', 'step': 4841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:52.004996', 'step': 4841, 'epoch': 2} {'type': 'loss', 'content': 0.0019031076226383448, 'timestamp': '2025-09-10 02:30:52.007277', 'step': 4842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:52.036419', 'step': 4842, 'epoch': 2} {'type': 'loss', 'content': 0.004857473075389862, 'timestamp': '2025-09-10 02:30:52.039201', 'step': 4843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:52.068321', 'step': 4843, 'epoch': 2} {'type': 'loss', 'content': 0.00918328296393156, 'timestamp': '2025-09-10 02:30:52.091986', 'step': 4844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:52.120957', 'step': 4844, 'epoch': 2} {'type': 'loss', 'content': 0.006627433467656374, 'timestamp': '2025-09-10 02:30:52.122846', 'step': 4845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:52.151523', 'step': 4845, 'epoch': 2} {'type': 'loss', 'content': 0.033394183963537216, 'timestamp': '2025-09-10 02:30:52.153667', 'step': 4846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:52.182443', 'step': 4846, 'epoch': 2} {'type': 'loss', 'content': 0.03488219529390335, 'timestamp': '2025-09-10 02:30:52.184305', 'step': 4847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:52.212914', 'step': 4847, 'epoch': 2} {'type': 'loss', 'content': 0.0009966939687728882, 'timestamp': '2025-09-10 02:30:52.236351', 'step': 4848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:52.265768', 'step': 4848, 'epoch': 2} {'type': 'loss', 'content': 0.001425236347131431, 'timestamp': '2025-09-10 02:30:52.267834', 'step': 4849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:52.297224', 'step': 4849, 'epoch': 2} {'type': 'loss', 'content': 0.020108234137296677, 'timestamp': '2025-09-10 02:30:52.299605', 'step': 4850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:52.328634', 'step': 4850, 'epoch': 2} {'type': 'loss', 'content': 0.0005182851455174387, 'timestamp': '2025-09-10 02:30:52.330593', 'step': 4851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:52.359382', 'step': 4851, 'epoch': 2} {'type': 'loss', 'content': 0.0004465934180188924, 'timestamp': '2025-09-10 02:30:52.382974', 'step': 4852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:52.412078', 'step': 4852, 'epoch': 2} {'type': 'loss', 'content': 0.0028146894183009863, 'timestamp': '2025-09-10 02:30:52.417244', 'step': 4853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:52.447859', 'step': 4853, 'epoch': 2} {'type': 'loss', 'content': 0.005732808727771044, 'timestamp': '2025-09-10 02:30:52.450177', 'step': 4854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:52.478948', 'step': 4854, 'epoch': 2} {'type': 'loss', 'content': 0.014171900227665901, 'timestamp': '2025-09-10 02:30:52.480959', 'step': 4855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:52.510368', 'step': 4855, 'epoch': 2} {'type': 'loss', 'content': 0.04297295957803726, 'timestamp': '2025-09-10 02:30:52.534030', 'step': 4856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:52.562966', 'step': 4856, 'epoch': 2} {'type': 'loss', 'content': 0.005833758972585201, 'timestamp': '2025-09-10 02:30:52.564904', 'step': 4857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:52.594240', 'step': 4857, 'epoch': 2} {'type': 'loss', 'content': 0.002182360040023923, 'timestamp': '2025-09-10 02:30:52.600049', 'step': 4858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:52.631390', 'step': 4858, 'epoch': 2} {'type': 'loss', 'content': 0.052217476069927216, 'timestamp': '2025-09-10 02:30:52.633824', 'step': 4859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:52.663977', 'step': 4859, 'epoch': 2} {'type': 'loss', 'content': 0.0024717673659324646, 'timestamp': '2025-09-10 02:30:52.687891', 'step': 4860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:52.716384', 'step': 4860, 'epoch': 2} {'type': 'loss', 'content': 0.01088649220764637, 'timestamp': '2025-09-10 02:30:52.718366', 'step': 4861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:52.747372', 'step': 4861, 'epoch': 2} {'type': 'loss', 'content': 0.004765079822391272, 'timestamp': '2025-09-10 02:30:52.749796', 'step': 4862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:52.778934', 'step': 4862, 'epoch': 2} {'type': 'loss', 'content': 0.013388590887188911, 'timestamp': '2025-09-10 02:30:52.780659', 'step': 4863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:52.809592', 'step': 4863, 'epoch': 2} {'type': 'loss', 'content': 0.04290319234132767, 'timestamp': '2025-09-10 02:30:52.833120', 'step': 4864, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1582003754624}], 'timestamp': '2025-09-10 02:30:54.729278', 'step': 4864, 'epoch': 2} {'type': 'pplx', 'content': 2178451.329655371, 'timestamp': '2025-09-10 02:30:54.731373', 'step': 4864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:54.759681', 'step': 4864, 'epoch': 2} {'type': 'loss', 'content': 0.009877247735857964, 'timestamp': '2025-09-10 02:30:54.762156', 'step': 4865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:54.791173', 'step': 4865, 'epoch': 2} {'type': 'loss', 'content': 0.015933571383357048, 'timestamp': '2025-09-10 02:30:54.793230', 'step': 4866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:54.822657', 'step': 4866, 'epoch': 2} {'type': 'loss', 'content': 0.045118559151887894, 'timestamp': '2025-09-10 02:30:54.824601', 'step': 4867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:54.853373', 'step': 4867, 'epoch': 2} {'type': 'loss', 'content': 0.01064248289912939, 'timestamp': '2025-09-10 02:30:54.876950', 'step': 4868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:54.906466', 'step': 4868, 'epoch': 2} {'type': 'loss', 'content': 0.002190890721976757, 'timestamp': '2025-09-10 02:30:54.910045', 'step': 4869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:54.940989', 'step': 4869, 'epoch': 2} {'type': 'loss', 'content': 0.004141589161008596, 'timestamp': '2025-09-10 02:30:54.943081', 'step': 4870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:54.973037', 'step': 4870, 'epoch': 2} {'type': 'loss', 'content': 0.002087392844259739, 'timestamp': '2025-09-10 02:30:54.975670', 'step': 4871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [1, 80], 'flops': 593517404912}, 'timestamp': '2025-09-10 02:30:55.005230', 'step': 4871, 'epoch': 2} {'type': 'loss', 'content': 2.8176156774861738e-05, 'timestamp': '2025-09-10 02:30:55.028664', 'step': 4872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:55.058654', 'step': 4872, 'epoch': 3} {'type': 'loss', 'content': 0.0014280019095167518, 'timestamp': '2025-09-10 02:30:55.060873', 'step': 4873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:55.090133', 'step': 4873, 'epoch': 3} {'type': 'loss', 'content': 0.001310416730120778, 'timestamp': '2025-09-10 02:30:55.092153', 'step': 4874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:55.120821', 'step': 4874, 'epoch': 3} {'type': 'loss', 'content': 0.012540838681161404, 'timestamp': '2025-09-10 02:30:55.123011', 'step': 4875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:55.151824', 'step': 4875, 'epoch': 3} {'type': 'loss', 'content': 0.003385176882147789, 'timestamp': '2025-09-10 02:30:55.175461', 'step': 4876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:55.204720', 'step': 4876, 'epoch': 3} {'type': 'loss', 'content': 0.017024511471390724, 'timestamp': '2025-09-10 02:30:55.206968', 'step': 4877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:55.235625', 'step': 4877, 'epoch': 3} {'type': 'loss', 'content': 0.0008043819689191878, 'timestamp': '2025-09-10 02:30:55.237915', 'step': 4878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:55.267047', 'step': 4878, 'epoch': 3} {'type': 'loss', 'content': 0.0031665521673858166, 'timestamp': '2025-09-10 02:30:55.269039', 'step': 4879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:55.297866', 'step': 4879, 'epoch': 3} {'type': 'loss', 'content': 0.009495946578681469, 'timestamp': '2025-09-10 02:30:55.321615', 'step': 4880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:55.350966', 'step': 4880, 'epoch': 3} {'type': 'loss', 'content': 0.005551033653318882, 'timestamp': '2025-09-10 02:30:55.352782', 'step': 4881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:55.381555', 'step': 4881, 'epoch': 3} {'type': 'loss', 'content': 0.010024656541645527, 'timestamp': '2025-09-10 02:30:55.383607', 'step': 4882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:55.412549', 'step': 4882, 'epoch': 3} {'type': 'loss', 'content': 0.02356908656656742, 'timestamp': '2025-09-10 02:30:55.414568', 'step': 4883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:55.443563', 'step': 4883, 'epoch': 3} {'type': 'loss', 'content': 0.005587874446064234, 'timestamp': '2025-09-10 02:30:55.467302', 'step': 4884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:55.496453', 'step': 4884, 'epoch': 3} {'type': 'loss', 'content': 0.005373778752982616, 'timestamp': '2025-09-10 02:30:55.498428', 'step': 4885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:55.527886', 'step': 4885, 'epoch': 3} {'type': 'loss', 'content': 0.008848587051033974, 'timestamp': '2025-09-10 02:30:55.529851', 'step': 4886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:55.558828', 'step': 4886, 'epoch': 3} {'type': 'loss', 'content': 0.005631915759295225, 'timestamp': '2025-09-10 02:30:55.560887', 'step': 4887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:55.589649', 'step': 4887, 'epoch': 3} {'type': 'loss', 'content': 0.008946587331593037, 'timestamp': '2025-09-10 02:30:55.613157', 'step': 4888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:55.642541', 'step': 4888, 'epoch': 3} {'type': 'loss', 'content': 0.00208521937020123, 'timestamp': '2025-09-10 02:30:55.644607', 'step': 4889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:55.673673', 'step': 4889, 'epoch': 3} {'type': 'loss', 'content': 0.0030278817284852266, 'timestamp': '2025-09-10 02:30:55.675501', 'step': 4890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:55.704385', 'step': 4890, 'epoch': 3} {'type': 'loss', 'content': 0.004253068007528782, 'timestamp': '2025-09-10 02:30:55.706508', 'step': 4891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:55.735543', 'step': 4891, 'epoch': 3} {'type': 'loss', 'content': 0.0020537772215902805, 'timestamp': '2025-09-10 02:30:55.758995', 'step': 4892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:55.788166', 'step': 4892, 'epoch': 3} {'type': 'loss', 'content': 0.0005098031833767891, 'timestamp': '2025-09-10 02:30:55.790448', 'step': 4893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:55.819035', 'step': 4893, 'epoch': 3} {'type': 'loss', 'content': 0.01048461813479662, 'timestamp': '2025-09-10 02:30:55.820952', 'step': 4894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:55.849814', 'step': 4894, 'epoch': 3} {'type': 'loss', 'content': 0.002517236163839698, 'timestamp': '2025-09-10 02:30:55.851904', 'step': 4895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:55.880610', 'step': 4895, 'epoch': 3} {'type': 'loss', 'content': 0.005761867854744196, 'timestamp': '2025-09-10 02:30:55.903958', 'step': 4896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:55.932854', 'step': 4896, 'epoch': 3} {'type': 'loss', 'content': 0.0038676075637340546, 'timestamp': '2025-09-10 02:30:55.934891', 'step': 4897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:55.963160', 'step': 4897, 'epoch': 3} {'type': 'loss', 'content': 0.0013028824469074607, 'timestamp': '2025-09-10 02:30:55.965037', 'step': 4898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:55.993837', 'step': 4898, 'epoch': 3} {'type': 'loss', 'content': 0.0014792155707255006, 'timestamp': '2025-09-10 02:30:55.995574', 'step': 4899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:56.024698', 'step': 4899, 'epoch': 3} {'type': 'loss', 'content': 0.00812798272818327, 'timestamp': '2025-09-10 02:30:56.048091', 'step': 4900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:56.077423', 'step': 4900, 'epoch': 3} {'type': 'loss', 'content': 9.349980246042833e-05, 'timestamp': '2025-09-10 02:30:56.079610', 'step': 4901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:56.108433', 'step': 4901, 'epoch': 3} {'type': 'loss', 'content': 0.0007435997831635177, 'timestamp': '2025-09-10 02:30:56.110405', 'step': 4902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:56.139003', 'step': 4902, 'epoch': 3} {'type': 'loss', 'content': 0.007078954018652439, 'timestamp': '2025-09-10 02:30:56.141088', 'step': 4903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:56.169951', 'step': 4903, 'epoch': 3} {'type': 'loss', 'content': 0.0019112270092591643, 'timestamp': '2025-09-10 02:30:56.193664', 'step': 4904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:56.223792', 'step': 4904, 'epoch': 3} {'type': 'loss', 'content': 0.0007984461262822151, 'timestamp': '2025-09-10 02:30:56.225434', 'step': 4905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:56.254567', 'step': 4905, 'epoch': 3} {'type': 'loss', 'content': 0.005893030669540167, 'timestamp': '2025-09-10 02:30:56.256582', 'step': 4906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:56.285719', 'step': 4906, 'epoch': 3} {'type': 'loss', 'content': 0.002062671585008502, 'timestamp': '2025-09-10 02:30:56.288020', 'step': 4907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:56.317047', 'step': 4907, 'epoch': 3} {'type': 'loss', 'content': 0.00011138491390738636, 'timestamp': '2025-09-10 02:30:56.340758', 'step': 4908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:56.370609', 'step': 4908, 'epoch': 3} {'type': 'loss', 'content': 0.0002839634835254401, 'timestamp': '2025-09-10 02:30:56.372465', 'step': 4909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:56.401909', 'step': 4909, 'epoch': 3} {'type': 'loss', 'content': 0.03709616884589195, 'timestamp': '2025-09-10 02:30:56.403896', 'step': 4910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:56.433071', 'step': 4910, 'epoch': 3} {'type': 'loss', 'content': 0.0009739446686580777, 'timestamp': '2025-09-10 02:30:56.435179', 'step': 4911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:56.464101', 'step': 4911, 'epoch': 3} {'type': 'loss', 'content': 0.004271974321454763, 'timestamp': '2025-09-10 02:30:56.487694', 'step': 4912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:56.516761', 'step': 4912, 'epoch': 3} {'type': 'loss', 'content': 0.0005912575870752335, 'timestamp': '2025-09-10 02:30:56.518939', 'step': 4913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:56.547837', 'step': 4913, 'epoch': 3} {'type': 'loss', 'content': 0.0004359797458164394, 'timestamp': '2025-09-10 02:30:56.549922', 'step': 4914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:56.579060', 'step': 4914, 'epoch': 3} {'type': 'loss', 'content': 0.020385054871439934, 'timestamp': '2025-09-10 02:30:56.580913', 'step': 4915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:56.609976', 'step': 4915, 'epoch': 3} {'type': 'loss', 'content': 0.01246044784784317, 'timestamp': '2025-09-10 02:30:56.633692', 'step': 4916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:56.663254', 'step': 4916, 'epoch': 3} {'type': 'loss', 'content': 0.0003972502890974283, 'timestamp': '2025-09-10 02:30:56.665203', 'step': 4917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:56.694697', 'step': 4917, 'epoch': 3} {'type': 'loss', 'content': 0.0024749203585088253, 'timestamp': '2025-09-10 02:30:56.696842', 'step': 4918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:56.725998', 'step': 4918, 'epoch': 3} {'type': 'loss', 'content': 0.00011896999785676599, 'timestamp': '2025-09-10 02:30:56.727923', 'step': 4919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:56.756735', 'step': 4919, 'epoch': 3} {'type': 'loss', 'content': 0.03590719774365425, 'timestamp': '2025-09-10 02:30:56.780360', 'step': 4920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:56.809418', 'step': 4920, 'epoch': 3} {'type': 'loss', 'content': 0.0033199347089976072, 'timestamp': '2025-09-10 02:30:56.811523', 'step': 4921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:56.841106', 'step': 4921, 'epoch': 3} {'type': 'loss', 'content': 0.0011541909771040082, 'timestamp': '2025-09-10 02:30:56.843436', 'step': 4922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:56.873352', 'step': 4922, 'epoch': 3} {'type': 'loss', 'content': 0.0016362580936402082, 'timestamp': '2025-09-10 02:30:56.875605', 'step': 4923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:56.905080', 'step': 4923, 'epoch': 3} {'type': 'loss', 'content': 0.0036181563045829535, 'timestamp': '2025-09-10 02:30:56.928572', 'step': 4924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:56.957967', 'step': 4924, 'epoch': 3} {'type': 'loss', 'content': 0.05081191286444664, 'timestamp': '2025-09-10 02:30:56.959940', 'step': 4925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:56.988879', 'step': 4925, 'epoch': 3} {'type': 'loss', 'content': 0.0005761417560279369, 'timestamp': '2025-09-10 02:30:56.990870', 'step': 4926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:57.019584', 'step': 4926, 'epoch': 3} {'type': 'loss', 'content': 0.0006865057512186468, 'timestamp': '2025-09-10 02:30:57.021584', 'step': 4927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:57.050505', 'step': 4927, 'epoch': 3} {'type': 'loss', 'content': 0.004511348903179169, 'timestamp': '2025-09-10 02:30:57.074331', 'step': 4928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:57.103454', 'step': 4928, 'epoch': 3} {'type': 'loss', 'content': 0.0009536277502775192, 'timestamp': '2025-09-10 02:30:57.105672', 'step': 4929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:57.135133', 'step': 4929, 'epoch': 3} {'type': 'loss', 'content': 0.00153729144949466, 'timestamp': '2025-09-10 02:30:57.137441', 'step': 4930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:57.167292', 'step': 4930, 'epoch': 3} {'type': 'loss', 'content': 0.0001375428691972047, 'timestamp': '2025-09-10 02:30:57.169207', 'step': 4931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:57.197808', 'step': 4931, 'epoch': 3} {'type': 'loss', 'content': 0.009686694480478764, 'timestamp': '2025-09-10 02:30:57.221513', 'step': 4932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:57.250724', 'step': 4932, 'epoch': 3} {'type': 'loss', 'content': 0.00025545855169184506, 'timestamp': '2025-09-10 02:30:57.252677', 'step': 4933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:57.281325', 'step': 4933, 'epoch': 3} {'type': 'loss', 'content': 0.02290979214012623, 'timestamp': '2025-09-10 02:30:57.283327', 'step': 4934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:57.312391', 'step': 4934, 'epoch': 3} {'type': 'loss', 'content': 0.0005374342435970902, 'timestamp': '2025-09-10 02:30:57.314372', 'step': 4935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:57.343458', 'step': 4935, 'epoch': 3} {'type': 'loss', 'content': 0.00914563238620758, 'timestamp': '2025-09-10 02:30:57.367320', 'step': 4936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:57.396425', 'step': 4936, 'epoch': 3} {'type': 'loss', 'content': 0.008263180032372475, 'timestamp': '2025-09-10 02:30:57.398370', 'step': 4937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:57.427491', 'step': 4937, 'epoch': 3} {'type': 'loss', 'content': 0.0008246535435318947, 'timestamp': '2025-09-10 02:30:57.429573', 'step': 4938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:57.460520', 'step': 4938, 'epoch': 3} {'type': 'loss', 'content': 0.004253962077200413, 'timestamp': '2025-09-10 02:30:57.462435', 'step': 4939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:57.492606', 'step': 4939, 'epoch': 3} {'type': 'loss', 'content': 0.0006401669816114008, 'timestamp': '2025-09-10 02:30:57.516326', 'step': 4940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:57.545816', 'step': 4940, 'epoch': 3} {'type': 'loss', 'content': 0.05252986028790474, 'timestamp': '2025-09-10 02:30:57.547685', 'step': 4941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:57.576910', 'step': 4941, 'epoch': 3} {'type': 'loss', 'content': 0.0003547283704392612, 'timestamp': '2025-09-10 02:30:57.579043', 'step': 4942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:57.608108', 'step': 4942, 'epoch': 3} {'type': 'loss', 'content': 0.00021801998082082719, 'timestamp': '2025-09-10 02:30:57.610139', 'step': 4943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:57.639729', 'step': 4943, 'epoch': 3} {'type': 'loss', 'content': 0.006962130311876535, 'timestamp': '2025-09-10 02:30:57.663675', 'step': 4944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:57.693492', 'step': 4944, 'epoch': 3} {'type': 'loss', 'content': 0.011197717860341072, 'timestamp': '2025-09-10 02:30:57.695777', 'step': 4945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:57.724854', 'step': 4945, 'epoch': 3} {'type': 'loss', 'content': 0.005284208804368973, 'timestamp': '2025-09-10 02:30:57.726966', 'step': 4946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:57.760765', 'step': 4946, 'epoch': 3} {'type': 'loss', 'content': 0.05076735466718674, 'timestamp': '2025-09-10 02:30:57.762679', 'step': 4947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:57.791881', 'step': 4947, 'epoch': 3} {'type': 'loss', 'content': 0.0007167586009018123, 'timestamp': '2025-09-10 02:30:57.815453', 'step': 4948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:57.847529', 'step': 4948, 'epoch': 3} {'type': 'loss', 'content': 0.000604218163061887, 'timestamp': '2025-09-10 02:30:57.850359', 'step': 4949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:57.884073', 'step': 4949, 'epoch': 3} {'type': 'loss', 'content': 0.011483556590974331, 'timestamp': '2025-09-10 02:30:57.886684', 'step': 4950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:57.921376', 'step': 4950, 'epoch': 3} {'type': 'loss', 'content': 0.01305474154651165, 'timestamp': '2025-09-10 02:30:57.926067', 'step': 4951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:57.961102', 'step': 4951, 'epoch': 3} {'type': 'loss', 'content': 0.00013778348511550575, 'timestamp': '2025-09-10 02:30:57.984516', 'step': 4952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:30:58.013208', 'step': 4952, 'epoch': 3} {'type': 'loss', 'content': 0.00018441618885844946, 'timestamp': '2025-09-10 02:30:58.018947', 'step': 4953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:58.047736', 'step': 4953, 'epoch': 3} {'type': 'loss', 'content': 0.05111263319849968, 'timestamp': '2025-09-10 02:30:58.049585', 'step': 4954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:58.078583', 'step': 4954, 'epoch': 3} {'type': 'loss', 'content': 0.0012449711794033647, 'timestamp': '2025-09-10 02:30:58.080957', 'step': 4955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:58.110094', 'step': 4955, 'epoch': 3} {'type': 'loss', 'content': 0.009258122183382511, 'timestamp': '2025-09-10 02:30:58.133899', 'step': 4956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:58.163735', 'step': 4956, 'epoch': 3} {'type': 'loss', 'content': 0.002155735855922103, 'timestamp': '2025-09-10 02:30:58.166012', 'step': 4957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:58.195004', 'step': 4957, 'epoch': 3} {'type': 'loss', 'content': 0.0022412240505218506, 'timestamp': '2025-09-10 02:30:58.196950', 'step': 4958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:58.225486', 'step': 4958, 'epoch': 3} {'type': 'loss', 'content': 0.00033534460817463696, 'timestamp': '2025-09-10 02:30:58.227275', 'step': 4959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:58.256020', 'step': 4959, 'epoch': 3} {'type': 'loss', 'content': 0.0064600491896271706, 'timestamp': '2025-09-10 02:30:58.280119', 'step': 4960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:58.310184', 'step': 4960, 'epoch': 3} {'type': 'loss', 'content': 0.0005327347898855805, 'timestamp': '2025-09-10 02:30:58.312268', 'step': 4961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:58.341323', 'step': 4961, 'epoch': 3} {'type': 'loss', 'content': 0.0001961165980901569, 'timestamp': '2025-09-10 02:30:58.343444', 'step': 4962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:58.372501', 'step': 4962, 'epoch': 3} {'type': 'loss', 'content': 0.0003622942604124546, 'timestamp': '2025-09-10 02:30:58.374462', 'step': 4963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:58.403498', 'step': 4963, 'epoch': 3} {'type': 'loss', 'content': 0.0043032411485910416, 'timestamp': '2025-09-10 02:30:58.427006', 'step': 4964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:58.456323', 'step': 4964, 'epoch': 3} {'type': 'loss', 'content': 0.004201345145702362, 'timestamp': '2025-09-10 02:30:58.458489', 'step': 4965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:58.487343', 'step': 4965, 'epoch': 3} {'type': 'loss', 'content': 0.012078885920345783, 'timestamp': '2025-09-10 02:30:58.489343', 'step': 4966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:58.518236', 'step': 4966, 'epoch': 3} {'type': 'loss', 'content': 0.037341173738241196, 'timestamp': '2025-09-10 02:30:58.520124', 'step': 4967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:58.549013', 'step': 4967, 'epoch': 3} {'type': 'loss', 'content': 0.001226257300004363, 'timestamp': '2025-09-10 02:30:58.572606', 'step': 4968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:58.602390', 'step': 4968, 'epoch': 3} {'type': 'loss', 'content': 0.0004969441215507686, 'timestamp': '2025-09-10 02:30:58.604432', 'step': 4969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:58.633771', 'step': 4969, 'epoch': 3} {'type': 'loss', 'content': 8.056405931711197e-05, 'timestamp': '2025-09-10 02:30:58.635845', 'step': 4970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:58.665133', 'step': 4970, 'epoch': 3} {'type': 'loss', 'content': 0.0022159700747579336, 'timestamp': '2025-09-10 02:30:58.667187', 'step': 4971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:58.695843', 'step': 4971, 'epoch': 3} {'type': 'loss', 'content': 0.000437339476775378, 'timestamp': '2025-09-10 02:30:58.719632', 'step': 4972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:58.751437', 'step': 4972, 'epoch': 3} {'type': 'loss', 'content': 0.0015226582763716578, 'timestamp': '2025-09-10 02:30:58.753557', 'step': 4973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:58.783053', 'step': 4973, 'epoch': 3} {'type': 'loss', 'content': 0.009864314459264278, 'timestamp': '2025-09-10 02:30:58.785177', 'step': 4974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:30:58.814421', 'step': 4974, 'epoch': 3} {'type': 'loss', 'content': 0.014232958666980267, 'timestamp': '2025-09-10 02:30:58.816302', 'step': 4975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:58.845790', 'step': 4975, 'epoch': 3} {'type': 'loss', 'content': 0.001106222509406507, 'timestamp': '2025-09-10 02:30:58.869502', 'step': 4976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:58.899119', 'step': 4976, 'epoch': 3} {'type': 'loss', 'content': 0.00033264182275161147, 'timestamp': '2025-09-10 02:30:58.901275', 'step': 4977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:58.931059', 'step': 4977, 'epoch': 3} {'type': 'loss', 'content': 0.00046894553815945983, 'timestamp': '2025-09-10 02:30:58.933069', 'step': 4978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:58.962008', 'step': 4978, 'epoch': 3} {'type': 'loss', 'content': 0.001208845293149352, 'timestamp': '2025-09-10 02:30:58.964084', 'step': 4979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:58.993135', 'step': 4979, 'epoch': 3} {'type': 'loss', 'content': 0.006808232516050339, 'timestamp': '2025-09-10 02:30:59.017447', 'step': 4980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:59.046549', 'step': 4980, 'epoch': 3} {'type': 'loss', 'content': 0.00014657301653642207, 'timestamp': '2025-09-10 02:30:59.048548', 'step': 4981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:59.077633', 'step': 4981, 'epoch': 3} {'type': 'loss', 'content': 0.07903604209423065, 'timestamp': '2025-09-10 02:30:59.079893', 'step': 4982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:59.108814', 'step': 4982, 'epoch': 3} {'type': 'loss', 'content': 0.031232083216309547, 'timestamp': '2025-09-10 02:30:59.110677', 'step': 4983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:59.139020', 'step': 4983, 'epoch': 3} {'type': 'loss', 'content': 0.0011663326295092702, 'timestamp': '2025-09-10 02:30:59.162732', 'step': 4984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:59.191585', 'step': 4984, 'epoch': 3} {'type': 'loss', 'content': 0.016943303868174553, 'timestamp': '2025-09-10 02:30:59.193540', 'step': 4985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:59.222314', 'step': 4985, 'epoch': 3} {'type': 'loss', 'content': 0.0015794998034834862, 'timestamp': '2025-09-10 02:30:59.224229', 'step': 4986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:59.253125', 'step': 4986, 'epoch': 3} {'type': 'loss', 'content': 0.03883078321814537, 'timestamp': '2025-09-10 02:30:59.255095', 'step': 4987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:59.283483', 'step': 4987, 'epoch': 3} {'type': 'loss', 'content': 0.00023402106307912618, 'timestamp': '2025-09-10 02:30:59.306726', 'step': 4988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:59.335306', 'step': 4988, 'epoch': 3} {'type': 'loss', 'content': 0.005712156184017658, 'timestamp': '2025-09-10 02:30:59.337494', 'step': 4989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:59.367008', 'step': 4989, 'epoch': 3} {'type': 'loss', 'content': 0.000885231769643724, 'timestamp': '2025-09-10 02:30:59.368900', 'step': 4990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:59.397889', 'step': 4990, 'epoch': 3} {'type': 'loss', 'content': 0.00021244284289423376, 'timestamp': '2025-09-10 02:30:59.399841', 'step': 4991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:30:59.428952', 'step': 4991, 'epoch': 3} {'type': 'loss', 'content': 0.009885640814900398, 'timestamp': '2025-09-10 02:30:59.452533', 'step': 4992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:59.481548', 'step': 4992, 'epoch': 3} {'type': 'loss', 'content': 0.00021031063806731254, 'timestamp': '2025-09-10 02:30:59.484064', 'step': 4993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:59.512613', 'step': 4993, 'epoch': 3} {'type': 'loss', 'content': 0.003944438882172108, 'timestamp': '2025-09-10 02:30:59.514656', 'step': 4994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:59.543447', 'step': 4994, 'epoch': 3} {'type': 'loss', 'content': 0.0002700241457205266, 'timestamp': '2025-09-10 02:30:59.545555', 'step': 4995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:59.574274', 'step': 4995, 'epoch': 3} {'type': 'loss', 'content': 0.0028411434032022953, 'timestamp': '2025-09-10 02:30:59.597723', 'step': 4996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:59.626826', 'step': 4996, 'epoch': 3} {'type': 'loss', 'content': 0.0009857058757916093, 'timestamp': '2025-09-10 02:30:59.630428', 'step': 4997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:59.661124', 'step': 4997, 'epoch': 3} {'type': 'loss', 'content': 0.0003661169612314552, 'timestamp': '2025-09-10 02:30:59.663116', 'step': 4998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:59.691888', 'step': 4998, 'epoch': 3} {'type': 'loss', 'content': 0.0011222055181860924, 'timestamp': '2025-09-10 02:30:59.694013', 'step': 4999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:30:59.722885', 'step': 4999, 'epoch': 3} {'type': 'loss', 'content': 0.023360345512628555, 'timestamp': '2025-09-10 02:30:59.747175', 'step': 5000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 5000', 'timestamp': '2025-09-10 02:31:04.185902', 'step': 5000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:04.225251', 'step': 5000, 'epoch': 3} {'type': 'loss', 'content': 0.004800345283001661, 'timestamp': '2025-09-10 02:31:04.227303', 'step': 5001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:04.259228', 'step': 5001, 'epoch': 3} {'type': 'loss', 'content': 0.00048084885929711163, 'timestamp': '2025-09-10 02:31:04.261214', 'step': 5002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:04.290803', 'step': 5002, 'epoch': 3} {'type': 'loss', 'content': 0.007763330824673176, 'timestamp': '2025-09-10 02:31:04.292958', 'step': 5003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:04.322053', 'step': 5003, 'epoch': 3} {'type': 'loss', 'content': 0.01378608588129282, 'timestamp': '2025-09-10 02:31:04.346130', 'step': 5004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:04.374942', 'step': 5004, 'epoch': 3} {'type': 'loss', 'content': 0.0037001348100602627, 'timestamp': '2025-09-10 02:31:04.378119', 'step': 5005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:04.408073', 'step': 5005, 'epoch': 3} {'type': 'loss', 'content': 0.00013897249300498515, 'timestamp': '2025-09-10 02:31:04.410564', 'step': 5006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:04.440401', 'step': 5006, 'epoch': 3} {'type': 'loss', 'content': 0.0008228147053159773, 'timestamp': '2025-09-10 02:31:04.442584', 'step': 5007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:04.473912', 'step': 5007, 'epoch': 3} {'type': 'loss', 'content': 0.027694018557667732, 'timestamp': '2025-09-10 02:31:04.501495', 'step': 5008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:04.537724', 'step': 5008, 'epoch': 3} {'type': 'loss', 'content': 0.0046686953864991665, 'timestamp': '2025-09-10 02:31:04.539612', 'step': 5009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:04.568994', 'step': 5009, 'epoch': 3} {'type': 'loss', 'content': 0.00345020298846066, 'timestamp': '2025-09-10 02:31:04.571508', 'step': 5010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:04.601580', 'step': 5010, 'epoch': 3} {'type': 'loss', 'content': 0.00317819113843143, 'timestamp': '2025-09-10 02:31:04.603608', 'step': 5011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:04.636356', 'step': 5011, 'epoch': 3} {'type': 'loss', 'content': 0.0029954214114695787, 'timestamp': '2025-09-10 02:31:04.660122', 'step': 5012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:04.689605', 'step': 5012, 'epoch': 3} {'type': 'loss', 'content': 0.003420087741687894, 'timestamp': '2025-09-10 02:31:04.692847', 'step': 5013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:04.721329', 'step': 5013, 'epoch': 3} {'type': 'loss', 'content': 0.002578622894361615, 'timestamp': '2025-09-10 02:31:04.723487', 'step': 5014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:04.756859', 'step': 5014, 'epoch': 3} {'type': 'loss', 'content': 0.00044571320177055895, 'timestamp': '2025-09-10 02:31:04.763216', 'step': 5015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:04.794631', 'step': 5015, 'epoch': 3} {'type': 'loss', 'content': 0.013255941681563854, 'timestamp': '2025-09-10 02:31:04.818710', 'step': 5016, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1582003754624}], 'timestamp': '2025-09-10 02:31:06.850304', 'step': 5016, 'epoch': 3} {'type': 'pplx', 'content': 2010640.6963496492, 'timestamp': '2025-09-10 02:31:06.852476', 'step': 5016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:06.880570', 'step': 5016, 'epoch': 3} {'type': 'loss', 'content': 0.014487654902040958, 'timestamp': '2025-09-10 02:31:06.883849', 'step': 5017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:06.914895', 'step': 5017, 'epoch': 3} {'type': 'loss', 'content': 0.0020964504219591618, 'timestamp': '2025-09-10 02:31:06.916938', 'step': 5018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:06.945872', 'step': 5018, 'epoch': 3} {'type': 'loss', 'content': 0.0005403442191891372, 'timestamp': '2025-09-10 02:31:06.947929', 'step': 5019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:06.976977', 'step': 5019, 'epoch': 3} {'type': 'loss', 'content': 0.00035207762266509235, 'timestamp': '2025-09-10 02:31:07.000788', 'step': 5020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:07.031718', 'step': 5020, 'epoch': 3} {'type': 'loss', 'content': 0.015816517174243927, 'timestamp': '2025-09-10 02:31:07.033653', 'step': 5021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:07.062374', 'step': 5021, 'epoch': 3} {'type': 'loss', 'content': 0.0006769512547180057, 'timestamp': '2025-09-10 02:31:07.064332', 'step': 5022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:07.094912', 'step': 5022, 'epoch': 3} {'type': 'loss', 'content': 0.0006116781732998788, 'timestamp': '2025-09-10 02:31:07.099658', 'step': 5023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:07.130903', 'step': 5023, 'epoch': 3} {'type': 'loss', 'content': 0.001322591444477439, 'timestamp': '2025-09-10 02:31:07.154227', 'step': 5024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:07.183081', 'step': 5024, 'epoch': 3} {'type': 'loss', 'content': 0.011232644319534302, 'timestamp': '2025-09-10 02:31:07.185022', 'step': 5025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:07.213389', 'step': 5025, 'epoch': 3} {'type': 'loss', 'content': 0.007375881541520357, 'timestamp': '2025-09-10 02:31:07.215298', 'step': 5026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:07.244311', 'step': 5026, 'epoch': 3} {'type': 'loss', 'content': 9.760600369190797e-05, 'timestamp': '2025-09-10 02:31:07.246122', 'step': 5027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:07.274692', 'step': 5027, 'epoch': 3} {'type': 'loss', 'content': 0.006258365698158741, 'timestamp': '2025-09-10 02:31:07.297937', 'step': 5028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:07.330179', 'step': 5028, 'epoch': 3} {'type': 'loss', 'content': 0.000321950443321839, 'timestamp': '2025-09-10 02:31:07.337302', 'step': 5029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:07.371440', 'step': 5029, 'epoch': 3} {'type': 'loss', 'content': 0.00030036564567126334, 'timestamp': '2025-09-10 02:31:07.373461', 'step': 5030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:07.404325', 'step': 5030, 'epoch': 3} {'type': 'loss', 'content': 0.00012984655040781945, 'timestamp': '2025-09-10 02:31:07.408412', 'step': 5031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:07.442315', 'step': 5031, 'epoch': 3} {'type': 'loss', 'content': 0.0006678973441012204, 'timestamp': '2025-09-10 02:31:07.465816', 'step': 5032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:07.499085', 'step': 5032, 'epoch': 3} {'type': 'loss', 'content': 0.00010896347521338612, 'timestamp': '2025-09-10 02:31:07.500935', 'step': 5033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:07.532984', 'step': 5033, 'epoch': 3} {'type': 'loss', 'content': 0.0009329610620625317, 'timestamp': '2025-09-10 02:31:07.534984', 'step': 5034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:07.564448', 'step': 5034, 'epoch': 3} {'type': 'loss', 'content': 0.000349718815414235, 'timestamp': '2025-09-10 02:31:07.566494', 'step': 5035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:07.595493', 'step': 5035, 'epoch': 3} {'type': 'loss', 'content': 0.000719129282515496, 'timestamp': '2025-09-10 02:31:07.619043', 'step': 5036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:07.648144', 'step': 5036, 'epoch': 3} {'type': 'loss', 'content': 0.00318794883787632, 'timestamp': '2025-09-10 02:31:07.650266', 'step': 5037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:07.682288', 'step': 5037, 'epoch': 3} {'type': 'loss', 'content': 0.004861722234636545, 'timestamp': '2025-09-10 02:31:07.683836', 'step': 5038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:07.712365', 'step': 5038, 'epoch': 3} {'type': 'loss', 'content': 0.00029474907205440104, 'timestamp': '2025-09-10 02:31:07.714293', 'step': 5039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:07.743357', 'step': 5039, 'epoch': 3} {'type': 'loss', 'content': 0.0002755603345576674, 'timestamp': '2025-09-10 02:31:07.766662', 'step': 5040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:07.796324', 'step': 5040, 'epoch': 3} {'type': 'loss', 'content': 0.011947492137551308, 'timestamp': '2025-09-10 02:31:07.798201', 'step': 5041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:07.828617', 'step': 5041, 'epoch': 3} {'type': 'loss', 'content': 0.0006149975233711302, 'timestamp': '2025-09-10 02:31:07.838691', 'step': 5042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:07.868109', 'step': 5042, 'epoch': 3} {'type': 'loss', 'content': 0.00019832760153803974, 'timestamp': '2025-09-10 02:31:07.870127', 'step': 5043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:07.900565', 'step': 5043, 'epoch': 3} {'type': 'loss', 'content': 0.009482159279286861, 'timestamp': '2025-09-10 02:31:07.925916', 'step': 5044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:07.958591', 'step': 5044, 'epoch': 3} {'type': 'loss', 'content': 0.00039480425766669214, 'timestamp': '2025-09-10 02:31:07.960939', 'step': 5045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:07.990159', 'step': 5045, 'epoch': 3} {'type': 'loss', 'content': 0.0005279025062918663, 'timestamp': '2025-09-10 02:31:07.994009', 'step': 5046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:08.025986', 'step': 5046, 'epoch': 3} {'type': 'loss', 'content': 0.00013250982738099992, 'timestamp': '2025-09-10 02:31:08.027773', 'step': 5047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:08.056314', 'step': 5047, 'epoch': 3} {'type': 'loss', 'content': 0.000527249532751739, 'timestamp': '2025-09-10 02:31:08.079740', 'step': 5048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:08.108209', 'step': 5048, 'epoch': 3} {'type': 'loss', 'content': 0.00014435425691772252, 'timestamp': '2025-09-10 02:31:08.110256', 'step': 5049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:08.138569', 'step': 5049, 'epoch': 3} {'type': 'loss', 'content': 0.0002105718303937465, 'timestamp': '2025-09-10 02:31:08.140147', 'step': 5050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:08.169702', 'step': 5050, 'epoch': 3} {'type': 'loss', 'content': 0.0002794148458633572, 'timestamp': '2025-09-10 02:31:08.171592', 'step': 5051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:08.200301', 'step': 5051, 'epoch': 3} {'type': 'loss', 'content': 0.0003587789833545685, 'timestamp': '2025-09-10 02:31:08.223616', 'step': 5052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:08.260027', 'step': 5052, 'epoch': 3} {'type': 'loss', 'content': 0.0004125781706534326, 'timestamp': '2025-09-10 02:31:08.262186', 'step': 5053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:08.291590', 'step': 5053, 'epoch': 3} {'type': 'loss', 'content': 0.0006383904255926609, 'timestamp': '2025-09-10 02:31:08.295424', 'step': 5054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:08.324982', 'step': 5054, 'epoch': 3} {'type': 'loss', 'content': 0.00018965710478369147, 'timestamp': '2025-09-10 02:31:08.326784', 'step': 5055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:08.355129', 'step': 5055, 'epoch': 3} {'type': 'loss', 'content': 0.0001069796271622181, 'timestamp': '2025-09-10 02:31:08.379819', 'step': 5056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:08.408780', 'step': 5056, 'epoch': 3} {'type': 'loss', 'content': 0.008336128666996956, 'timestamp': '2025-09-10 02:31:08.410834', 'step': 5057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:08.439485', 'step': 5057, 'epoch': 3} {'type': 'loss', 'content': 0.0013685652520507574, 'timestamp': '2025-09-10 02:31:08.441502', 'step': 5058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:08.469981', 'step': 5058, 'epoch': 3} {'type': 'loss', 'content': 0.0024607598315924406, 'timestamp': '2025-09-10 02:31:08.472033', 'step': 5059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:08.501167', 'step': 5059, 'epoch': 3} {'type': 'loss', 'content': 0.0005927715683355927, 'timestamp': '2025-09-10 02:31:08.524742', 'step': 5060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:08.555430', 'step': 5060, 'epoch': 3} {'type': 'loss', 'content': 0.0002251004771096632, 'timestamp': '2025-09-10 02:31:08.557287', 'step': 5061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:08.591780', 'step': 5061, 'epoch': 3} {'type': 'loss', 'content': 0.0005563314771279693, 'timestamp': '2025-09-10 02:31:08.594109', 'step': 5062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:08.622772', 'step': 5062, 'epoch': 3} {'type': 'loss', 'content': 0.0013570614391937852, 'timestamp': '2025-09-10 02:31:08.624570', 'step': 5063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:08.653133', 'step': 5063, 'epoch': 3} {'type': 'loss', 'content': 0.0001732300443109125, 'timestamp': '2025-09-10 02:31:08.676428', 'step': 5064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:08.705689', 'step': 5064, 'epoch': 3} {'type': 'loss', 'content': 0.00969749130308628, 'timestamp': '2025-09-10 02:31:08.707644', 'step': 5065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:08.736460', 'step': 5065, 'epoch': 3} {'type': 'loss', 'content': 0.00025736287352629006, 'timestamp': '2025-09-10 02:31:08.738393', 'step': 5066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:08.771364', 'step': 5066, 'epoch': 3} {'type': 'loss', 'content': 0.002629172755405307, 'timestamp': '2025-09-10 02:31:08.774514', 'step': 5067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:08.806106', 'step': 5067, 'epoch': 3} {'type': 'loss', 'content': 0.0007719130371697247, 'timestamp': '2025-09-10 02:31:08.829450', 'step': 5068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:08.858879', 'step': 5068, 'epoch': 3} {'type': 'loss', 'content': 0.0004081004299223423, 'timestamp': '2025-09-10 02:31:08.861702', 'step': 5069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:08.890626', 'step': 5069, 'epoch': 3} {'type': 'loss', 'content': 0.006041979882866144, 'timestamp': '2025-09-10 02:31:08.892532', 'step': 5070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:08.920999', 'step': 5070, 'epoch': 3} {'type': 'loss', 'content': 0.002837598556652665, 'timestamp': '2025-09-10 02:31:08.922989', 'step': 5071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:08.959416', 'step': 5071, 'epoch': 3} {'type': 'loss', 'content': 0.0034873499535024166, 'timestamp': '2025-09-10 02:31:08.983717', 'step': 5072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:09.013499', 'step': 5072, 'epoch': 3} {'type': 'loss', 'content': 0.0001958427019417286, 'timestamp': '2025-09-10 02:31:09.017855', 'step': 5073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:09.066465', 'step': 5073, 'epoch': 3} {'type': 'loss', 'content': 0.002457441296428442, 'timestamp': '2025-09-10 02:31:09.071913', 'step': 5074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:09.107319', 'step': 5074, 'epoch': 3} {'type': 'loss', 'content': 0.029114514589309692, 'timestamp': '2025-09-10 02:31:09.110183', 'step': 5075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:09.139123', 'step': 5075, 'epoch': 3} {'type': 'loss', 'content': 0.0008781243814155459, 'timestamp': '2025-09-10 02:31:09.162593', 'step': 5076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:09.191139', 'step': 5076, 'epoch': 3} {'type': 'loss', 'content': 0.0006751882610842586, 'timestamp': '2025-09-10 02:31:09.193204', 'step': 5077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:09.221614', 'step': 5077, 'epoch': 3} {'type': 'loss', 'content': 0.0052112252451479435, 'timestamp': '2025-09-10 02:31:09.223384', 'step': 5078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:09.252144', 'step': 5078, 'epoch': 3} {'type': 'loss', 'content': 0.02491091564297676, 'timestamp': '2025-09-10 02:31:09.255062', 'step': 5079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:09.285277', 'step': 5079, 'epoch': 3} {'type': 'loss', 'content': 0.03170705586671829, 'timestamp': '2025-09-10 02:31:09.308787', 'step': 5080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:09.344060', 'step': 5080, 'epoch': 3} {'type': 'loss', 'content': 0.00020365203090477735, 'timestamp': '2025-09-10 02:31:09.345952', 'step': 5081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:09.374932', 'step': 5081, 'epoch': 3} {'type': 'loss', 'content': 0.0001544384576845914, 'timestamp': '2025-09-10 02:31:09.378225', 'step': 5082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:09.409239', 'step': 5082, 'epoch': 3} {'type': 'loss', 'content': 0.019298970699310303, 'timestamp': '2025-09-10 02:31:09.411028', 'step': 5083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:09.439527', 'step': 5083, 'epoch': 3} {'type': 'loss', 'content': 0.0001749621151247993, 'timestamp': '2025-09-10 02:31:09.462809', 'step': 5084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:09.491707', 'step': 5084, 'epoch': 3} {'type': 'loss', 'content': 0.000501964648719877, 'timestamp': '2025-09-10 02:31:09.493501', 'step': 5085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:09.521884', 'step': 5085, 'epoch': 3} {'type': 'loss', 'content': 0.0014790799468755722, 'timestamp': '2025-09-10 02:31:09.523935', 'step': 5086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:09.552743', 'step': 5086, 'epoch': 3} {'type': 'loss', 'content': 0.0002753040171228349, 'timestamp': '2025-09-10 02:31:09.554508', 'step': 5087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:09.583298', 'step': 5087, 'epoch': 3} {'type': 'loss', 'content': 0.002824705094099045, 'timestamp': '2025-09-10 02:31:09.607020', 'step': 5088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:09.635865', 'step': 5088, 'epoch': 3} {'type': 'loss', 'content': 0.0007388076628558338, 'timestamp': '2025-09-10 02:31:09.638008', 'step': 5089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:09.668691', 'step': 5089, 'epoch': 3} {'type': 'loss', 'content': 0.0011144352611154318, 'timestamp': '2025-09-10 02:31:09.670409', 'step': 5090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:09.699367', 'step': 5090, 'epoch': 3} {'type': 'loss', 'content': 0.01142920646816492, 'timestamp': '2025-09-10 02:31:09.702213', 'step': 5091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:09.731067', 'step': 5091, 'epoch': 3} {'type': 'loss', 'content': 0.06258725374937057, 'timestamp': '2025-09-10 02:31:09.754622', 'step': 5092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:09.784167', 'step': 5092, 'epoch': 3} {'type': 'loss', 'content': 0.0002580071159172803, 'timestamp': '2025-09-10 02:31:09.786122', 'step': 5093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:09.815970', 'step': 5093, 'epoch': 3} {'type': 'loss', 'content': 7.947625999804586e-05, 'timestamp': '2025-09-10 02:31:09.819740', 'step': 5094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:09.851858', 'step': 5094, 'epoch': 3} {'type': 'loss', 'content': 0.012915769591927528, 'timestamp': '2025-09-10 02:31:09.853989', 'step': 5095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:09.887870', 'step': 5095, 'epoch': 3} {'type': 'loss', 'content': 0.040144938975572586, 'timestamp': '2025-09-10 02:31:09.911380', 'step': 5096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:09.940619', 'step': 5096, 'epoch': 3} {'type': 'loss', 'content': 0.05270438641309738, 'timestamp': '2025-09-10 02:31:09.942549', 'step': 5097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:09.974571', 'step': 5097, 'epoch': 3} {'type': 'loss', 'content': 0.02041652984917164, 'timestamp': '2025-09-10 02:31:09.976535', 'step': 5098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:10.009654', 'step': 5098, 'epoch': 3} {'type': 'loss', 'content': 0.016050977632403374, 'timestamp': '2025-09-10 02:31:10.012133', 'step': 5099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:10.051437', 'step': 5099, 'epoch': 3} {'type': 'loss', 'content': 0.001969614764675498, 'timestamp': '2025-09-10 02:31:10.075030', 'step': 5100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:10.104149', 'step': 5100, 'epoch': 3} {'type': 'loss', 'content': 0.0016167467692866921, 'timestamp': '2025-09-10 02:31:10.106029', 'step': 5101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:10.134850', 'step': 5101, 'epoch': 3} {'type': 'loss', 'content': 0.0035023626405745745, 'timestamp': '2025-09-10 02:31:10.136730', 'step': 5102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:10.167536', 'step': 5102, 'epoch': 3} {'type': 'loss', 'content': 0.0002871211036108434, 'timestamp': '2025-09-10 02:31:10.170108', 'step': 5103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:10.202275', 'step': 5103, 'epoch': 3} {'type': 'loss', 'content': 0.00033675372833386064, 'timestamp': '2025-09-10 02:31:10.225964', 'step': 5104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:10.259639', 'step': 5104, 'epoch': 3} {'type': 'loss', 'content': 0.008695071563124657, 'timestamp': '2025-09-10 02:31:10.263836', 'step': 5105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:10.292888', 'step': 5105, 'epoch': 3} {'type': 'loss', 'content': 0.017227793112397194, 'timestamp': '2025-09-10 02:31:10.294960', 'step': 5106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:10.324198', 'step': 5106, 'epoch': 3} {'type': 'loss', 'content': 0.0003940062306355685, 'timestamp': '2025-09-10 02:31:10.326225', 'step': 5107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:10.355060', 'step': 5107, 'epoch': 3} {'type': 'loss', 'content': 0.0002077590033877641, 'timestamp': '2025-09-10 02:31:10.378658', 'step': 5108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:10.409881', 'step': 5108, 'epoch': 3} {'type': 'loss', 'content': 0.0076890503987669945, 'timestamp': '2025-09-10 02:31:10.412865', 'step': 5109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:10.441878', 'step': 5109, 'epoch': 3} {'type': 'loss', 'content': 0.022206833586096764, 'timestamp': '2025-09-10 02:31:10.444831', 'step': 5110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:10.476159', 'step': 5110, 'epoch': 3} {'type': 'loss', 'content': 0.008559671230614185, 'timestamp': '2025-09-10 02:31:10.478122', 'step': 5111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:10.507470', 'step': 5111, 'epoch': 3} {'type': 'loss', 'content': 0.0028873595874756575, 'timestamp': '2025-09-10 02:31:10.533094', 'step': 5112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:10.562208', 'step': 5112, 'epoch': 3} {'type': 'loss', 'content': 9.522762411506847e-05, 'timestamp': '2025-09-10 02:31:10.564838', 'step': 5113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:10.593889', 'step': 5113, 'epoch': 3} {'type': 'loss', 'content': 0.004308689851313829, 'timestamp': '2025-09-10 02:31:10.596276', 'step': 5114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:10.626008', 'step': 5114, 'epoch': 3} {'type': 'loss', 'content': 0.002052604453638196, 'timestamp': '2025-09-10 02:31:10.628078', 'step': 5115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:10.657320', 'step': 5115, 'epoch': 3} {'type': 'loss', 'content': 0.02458536997437477, 'timestamp': '2025-09-10 02:31:10.680984', 'step': 5116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:10.711612', 'step': 5116, 'epoch': 3} {'type': 'loss', 'content': 0.023380303755402565, 'timestamp': '2025-09-10 02:31:10.713857', 'step': 5117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:10.754490', 'step': 5117, 'epoch': 3} {'type': 'loss', 'content': 0.05575161799788475, 'timestamp': '2025-09-10 02:31:10.756618', 'step': 5118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:10.785358', 'step': 5118, 'epoch': 3} {'type': 'loss', 'content': 0.0006170102278701961, 'timestamp': '2025-09-10 02:31:10.788518', 'step': 5119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:10.819088', 'step': 5119, 'epoch': 3} {'type': 'loss', 'content': 0.021255100145936012, 'timestamp': '2025-09-10 02:31:10.842873', 'step': 5120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:10.873048', 'step': 5120, 'epoch': 3} {'type': 'loss', 'content': 0.0035779413301497698, 'timestamp': '2025-09-10 02:31:10.874938', 'step': 5121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:10.905833', 'step': 5121, 'epoch': 3} {'type': 'loss', 'content': 0.0019906642846763134, 'timestamp': '2025-09-10 02:31:10.908605', 'step': 5122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:10.937104', 'step': 5122, 'epoch': 3} {'type': 'loss', 'content': 0.023533621802926064, 'timestamp': '2025-09-10 02:31:10.940226', 'step': 5123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:10.968752', 'step': 5123, 'epoch': 3} {'type': 'loss', 'content': 0.007398799993097782, 'timestamp': '2025-09-10 02:31:10.992600', 'step': 5124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:11.022150', 'step': 5124, 'epoch': 3} {'type': 'loss', 'content': 0.06380916386842728, 'timestamp': '2025-09-10 02:31:11.024219', 'step': 5125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:11.061488', 'step': 5125, 'epoch': 3} {'type': 'loss', 'content': 0.0009983654599636793, 'timestamp': '2025-09-10 02:31:11.063442', 'step': 5126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:11.093207', 'step': 5126, 'epoch': 3} {'type': 'loss', 'content': 0.005230212118476629, 'timestamp': '2025-09-10 02:31:11.095166', 'step': 5127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:11.124358', 'step': 5127, 'epoch': 3} {'type': 'loss', 'content': 0.0027204251382499933, 'timestamp': '2025-09-10 02:31:11.149679', 'step': 5128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:11.181224', 'step': 5128, 'epoch': 3} {'type': 'loss', 'content': 0.004974003881216049, 'timestamp': '2025-09-10 02:31:11.183023', 'step': 5129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:11.212086', 'step': 5129, 'epoch': 3} {'type': 'loss', 'content': 0.0012775195064023137, 'timestamp': '2025-09-10 02:31:11.214239', 'step': 5130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:11.243816', 'step': 5130, 'epoch': 3} {'type': 'loss', 'content': 0.001391895697452128, 'timestamp': '2025-09-10 02:31:11.252463', 'step': 5131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:11.282080', 'step': 5131, 'epoch': 3} {'type': 'loss', 'content': 0.028196966275572777, 'timestamp': '2025-09-10 02:31:11.305462', 'step': 5132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:11.335006', 'step': 5132, 'epoch': 3} {'type': 'loss', 'content': 0.02282044105231762, 'timestamp': '2025-09-10 02:31:11.337981', 'step': 5133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:11.369711', 'step': 5133, 'epoch': 3} {'type': 'loss', 'content': 0.014931939542293549, 'timestamp': '2025-09-10 02:31:11.371625', 'step': 5134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:11.404273', 'step': 5134, 'epoch': 3} {'type': 'loss', 'content': 0.01002415083348751, 'timestamp': '2025-09-10 02:31:11.406092', 'step': 5135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:11.436762', 'step': 5135, 'epoch': 3} {'type': 'loss', 'content': 0.0012437284458428621, 'timestamp': '2025-09-10 02:31:11.460387', 'step': 5136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:11.492270', 'step': 5136, 'epoch': 3} {'type': 'loss', 'content': 0.002360876649618149, 'timestamp': '2025-09-10 02:31:11.494335', 'step': 5137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:11.523343', 'step': 5137, 'epoch': 3} {'type': 'loss', 'content': 0.004048882517963648, 'timestamp': '2025-09-10 02:31:11.525120', 'step': 5138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:11.553866', 'step': 5138, 'epoch': 3} {'type': 'loss', 'content': 0.000911208160687238, 'timestamp': '2025-09-10 02:31:11.556320', 'step': 5139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:11.585475', 'step': 5139, 'epoch': 3} {'type': 'loss', 'content': 0.0005161958979442716, 'timestamp': '2025-09-10 02:31:11.612272', 'step': 5140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:11.641484', 'step': 5140, 'epoch': 3} {'type': 'loss', 'content': 0.013862027786672115, 'timestamp': '2025-09-10 02:31:11.643450', 'step': 5141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:11.672326', 'step': 5141, 'epoch': 3} {'type': 'loss', 'content': 0.004430785309523344, 'timestamp': '2025-09-10 02:31:11.674396', 'step': 5142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:11.703388', 'step': 5142, 'epoch': 3} {'type': 'loss', 'content': 0.001230349880643189, 'timestamp': '2025-09-10 02:31:11.706306', 'step': 5143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:11.742350', 'step': 5143, 'epoch': 3} {'type': 'loss', 'content': 0.0032119769603013992, 'timestamp': '2025-09-10 02:31:11.766221', 'step': 5144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:11.796389', 'step': 5144, 'epoch': 3} {'type': 'loss', 'content': 0.00026887355488725007, 'timestamp': '2025-09-10 02:31:11.798663', 'step': 5145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:11.827906', 'step': 5145, 'epoch': 3} {'type': 'loss', 'content': 0.011979511938989162, 'timestamp': '2025-09-10 02:31:11.830033', 'step': 5146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:11.860555', 'step': 5146, 'epoch': 3} {'type': 'loss', 'content': 0.020315438508987427, 'timestamp': '2025-09-10 02:31:11.862697', 'step': 5147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:11.891342', 'step': 5147, 'epoch': 3} {'type': 'loss', 'content': 0.004353826399892569, 'timestamp': '2025-09-10 02:31:11.914762', 'step': 5148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:11.943945', 'step': 5148, 'epoch': 3} {'type': 'loss', 'content': 0.0077579873614013195, 'timestamp': '2025-09-10 02:31:11.945921', 'step': 5149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:11.974708', 'step': 5149, 'epoch': 3} {'type': 'loss', 'content': 0.050322677940130234, 'timestamp': '2025-09-10 02:31:11.976785', 'step': 5150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:12.007413', 'step': 5150, 'epoch': 3} {'type': 'loss', 'content': 0.010600102134048939, 'timestamp': '2025-09-10 02:31:12.009507', 'step': 5151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:12.038723', 'step': 5151, 'epoch': 3} {'type': 'loss', 'content': 0.001150781405158341, 'timestamp': '2025-09-10 02:31:12.064258', 'step': 5152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:12.093636', 'step': 5152, 'epoch': 3} {'type': 'loss', 'content': 0.015882886946201324, 'timestamp': '2025-09-10 02:31:12.095299', 'step': 5153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:12.124352', 'step': 5153, 'epoch': 3} {'type': 'loss', 'content': 0.038372162729501724, 'timestamp': '2025-09-10 02:31:12.126389', 'step': 5154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:12.155437', 'step': 5154, 'epoch': 3} {'type': 'loss', 'content': 0.005249445792287588, 'timestamp': '2025-09-10 02:31:12.160743', 'step': 5155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:12.194367', 'step': 5155, 'epoch': 3} {'type': 'loss', 'content': 0.01985199749469757, 'timestamp': '2025-09-10 02:31:12.225728', 'step': 5156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:12.254612', 'step': 5156, 'epoch': 3} {'type': 'loss', 'content': 0.0048411195166409016, 'timestamp': '2025-09-10 02:31:12.256600', 'step': 5157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:12.285917', 'step': 5157, 'epoch': 3} {'type': 'loss', 'content': 0.000420969765400514, 'timestamp': '2025-09-10 02:31:12.287665', 'step': 5158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:12.316673', 'step': 5158, 'epoch': 3} {'type': 'loss', 'content': 0.0017089421162381768, 'timestamp': '2025-09-10 02:31:12.318529', 'step': 5159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:12.347811', 'step': 5159, 'epoch': 3} {'type': 'loss', 'content': 0.0075353109277784824, 'timestamp': '2025-09-10 02:31:12.371468', 'step': 5160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:12.402929', 'step': 5160, 'epoch': 3} {'type': 'loss', 'content': 0.002512335777282715, 'timestamp': '2025-09-10 02:31:12.405393', 'step': 5161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:12.435661', 'step': 5161, 'epoch': 3} {'type': 'loss', 'content': 0.01650133542716503, 'timestamp': '2025-09-10 02:31:12.437789', 'step': 5162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:12.467321', 'step': 5162, 'epoch': 3} {'type': 'loss', 'content': 0.0005163501482456923, 'timestamp': '2025-09-10 02:31:12.469340', 'step': 5163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:12.498776', 'step': 5163, 'epoch': 3} {'type': 'loss', 'content': 0.000299501174595207, 'timestamp': '2025-09-10 02:31:12.522170', 'step': 5164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:12.554587', 'step': 5164, 'epoch': 3} {'type': 'loss', 'content': 0.0024083254393190145, 'timestamp': '2025-09-10 02:31:12.556561', 'step': 5165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:12.585640', 'step': 5165, 'epoch': 3} {'type': 'loss', 'content': 0.023579344153404236, 'timestamp': '2025-09-10 02:31:12.587683', 'step': 5166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:12.616248', 'step': 5166, 'epoch': 3} {'type': 'loss', 'content': 0.0071758790872991085, 'timestamp': '2025-09-10 02:31:12.618218', 'step': 5167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:12.646592', 'step': 5167, 'epoch': 3} {'type': 'loss', 'content': 0.013538946397602558, 'timestamp': '2025-09-10 02:31:12.670222', 'step': 5168, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1582003754624}], 'timestamp': '2025-09-10 02:31:15.226932', 'step': 5168, 'epoch': 3} {'type': 'pplx', 'content': 2436733.3643380217, 'timestamp': '2025-09-10 02:31:15.228967', 'step': 5168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:15.260803', 'step': 5168, 'epoch': 3} {'type': 'loss', 'content': 0.00024558056611567736, 'timestamp': '2025-09-10 02:31:15.262899', 'step': 5169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:15.292171', 'step': 5169, 'epoch': 3} {'type': 'loss', 'content': 0.0008377385092899203, 'timestamp': '2025-09-10 02:31:15.296023', 'step': 5170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:15.325645', 'step': 5170, 'epoch': 3} {'type': 'loss', 'content': 0.016198571771383286, 'timestamp': '2025-09-10 02:31:15.328746', 'step': 5171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:15.359603', 'step': 5171, 'epoch': 3} {'type': 'loss', 'content': 0.018108924850821495, 'timestamp': '2025-09-10 02:31:15.384143', 'step': 5172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:15.413231', 'step': 5172, 'epoch': 3} {'type': 'loss', 'content': 0.0005432538455352187, 'timestamp': '2025-09-10 02:31:15.415305', 'step': 5173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:15.444223', 'step': 5173, 'epoch': 3} {'type': 'loss', 'content': 0.0016497739125043154, 'timestamp': '2025-09-10 02:31:15.446101', 'step': 5174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:15.474950', 'step': 5174, 'epoch': 3} {'type': 'loss', 'content': 0.018692251294851303, 'timestamp': '2025-09-10 02:31:15.476870', 'step': 5175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:15.505903', 'step': 5175, 'epoch': 3} {'type': 'loss', 'content': 0.0012261820957064629, 'timestamp': '2025-09-10 02:31:15.531316', 'step': 5176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:15.560387', 'step': 5176, 'epoch': 3} {'type': 'loss', 'content': 0.01112561859190464, 'timestamp': '2025-09-10 02:31:15.562448', 'step': 5177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:15.592180', 'step': 5177, 'epoch': 3} {'type': 'loss', 'content': 0.012058419175446033, 'timestamp': '2025-09-10 02:31:15.594155', 'step': 5178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:15.624368', 'step': 5178, 'epoch': 3} {'type': 'loss', 'content': 0.0004496954788919538, 'timestamp': '2025-09-10 02:31:15.626189', 'step': 5179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:15.654933', 'step': 5179, 'epoch': 3} {'type': 'loss', 'content': 0.0007760432199575007, 'timestamp': '2025-09-10 02:31:15.678476', 'step': 5180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:15.708930', 'step': 5180, 'epoch': 3} {'type': 'loss', 'content': 0.0017544291913509369, 'timestamp': '2025-09-10 02:31:15.710981', 'step': 5181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:15.742898', 'step': 5181, 'epoch': 3} {'type': 'loss', 'content': 0.016934078186750412, 'timestamp': '2025-09-10 02:31:15.744876', 'step': 5182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:15.774106', 'step': 5182, 'epoch': 3} {'type': 'loss', 'content': 0.0005306123639456928, 'timestamp': '2025-09-10 02:31:15.776034', 'step': 5183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:15.804856', 'step': 5183, 'epoch': 3} {'type': 'loss', 'content': 0.03523048013448715, 'timestamp': '2025-09-10 02:31:15.829350', 'step': 5184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:15.858363', 'step': 5184, 'epoch': 3} {'type': 'loss', 'content': 0.00044931474258191884, 'timestamp': '2025-09-10 02:31:15.861656', 'step': 5185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:15.893036', 'step': 5185, 'epoch': 3} {'type': 'loss', 'content': 0.0012647055555135012, 'timestamp': '2025-09-10 02:31:15.894987', 'step': 5186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:15.923710', 'step': 5186, 'epoch': 3} {'type': 'loss', 'content': 0.00022437133884523064, 'timestamp': '2025-09-10 02:31:15.925585', 'step': 5187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:15.953947', 'step': 5187, 'epoch': 3} {'type': 'loss', 'content': 0.00028958715847693384, 'timestamp': '2025-09-10 02:31:15.977244', 'step': 5188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:16.006262', 'step': 5188, 'epoch': 3} {'type': 'loss', 'content': 0.0008154022507369518, 'timestamp': '2025-09-10 02:31:16.013030', 'step': 5189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:16.045272', 'step': 5189, 'epoch': 3} {'type': 'loss', 'content': 0.003911999054253101, 'timestamp': '2025-09-10 02:31:16.047375', 'step': 5190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:16.077052', 'step': 5190, 'epoch': 3} {'type': 'loss', 'content': 0.00020245795894879848, 'timestamp': '2025-09-10 02:31:16.079111', 'step': 5191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:16.107901', 'step': 5191, 'epoch': 3} {'type': 'loss', 'content': 0.0006830912898294628, 'timestamp': '2025-09-10 02:31:16.131569', 'step': 5192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:16.160397', 'step': 5192, 'epoch': 3} {'type': 'loss', 'content': 0.000709925836417824, 'timestamp': '2025-09-10 02:31:16.162461', 'step': 5193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:16.191699', 'step': 5193, 'epoch': 3} {'type': 'loss', 'content': 0.008646724745631218, 'timestamp': '2025-09-10 02:31:16.193759', 'step': 5194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:16.222866', 'step': 5194, 'epoch': 3} {'type': 'loss', 'content': 0.00067809788743034, 'timestamp': '2025-09-10 02:31:16.224883', 'step': 5195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:16.253496', 'step': 5195, 'epoch': 3} {'type': 'loss', 'content': 0.00020860283984802663, 'timestamp': '2025-09-10 02:31:16.277025', 'step': 5196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:16.306117', 'step': 5196, 'epoch': 3} {'type': 'loss', 'content': 0.007869458757340908, 'timestamp': '2025-09-10 02:31:16.308061', 'step': 5197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:16.337096', 'step': 5197, 'epoch': 3} {'type': 'loss', 'content': 0.0542362816631794, 'timestamp': '2025-09-10 02:31:16.339078', 'step': 5198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:16.367853', 'step': 5198, 'epoch': 3} {'type': 'loss', 'content': 0.007059518247842789, 'timestamp': '2025-09-10 02:31:16.369810', 'step': 5199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:16.398806', 'step': 5199, 'epoch': 3} {'type': 'loss', 'content': 0.0003244362014811486, 'timestamp': '2025-09-10 02:31:16.422496', 'step': 5200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:16.452339', 'step': 5200, 'epoch': 3} {'type': 'loss', 'content': 0.013305050320923328, 'timestamp': '2025-09-10 02:31:16.454187', 'step': 5201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:16.482981', 'step': 5201, 'epoch': 3} {'type': 'loss', 'content': 0.015022998675704002, 'timestamp': '2025-09-10 02:31:16.484794', 'step': 5202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:16.515009', 'step': 5202, 'epoch': 3} {'type': 'loss', 'content': 0.0001668601034907624, 'timestamp': '2025-09-10 02:31:16.516846', 'step': 5203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:16.551679', 'step': 5203, 'epoch': 3} {'type': 'loss', 'content': 0.00025806803023442626, 'timestamp': '2025-09-10 02:31:16.575165', 'step': 5204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:16.603895', 'step': 5204, 'epoch': 3} {'type': 'loss', 'content': 0.013605805113911629, 'timestamp': '2025-09-10 02:31:16.605943', 'step': 5205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:16.635001', 'step': 5205, 'epoch': 3} {'type': 'loss', 'content': 0.04866912215948105, 'timestamp': '2025-09-10 02:31:16.636837', 'step': 5206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:16.665226', 'step': 5206, 'epoch': 3} {'type': 'loss', 'content': 0.0006265775300562382, 'timestamp': '2025-09-10 02:31:16.667115', 'step': 5207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:16.696538', 'step': 5207, 'epoch': 3} {'type': 'loss', 'content': 0.008275379426777363, 'timestamp': '2025-09-10 02:31:16.720946', 'step': 5208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:16.751174', 'step': 5208, 'epoch': 3} {'type': 'loss', 'content': 0.006404665298759937, 'timestamp': '2025-09-10 02:31:16.753020', 'step': 5209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:16.781809', 'step': 5209, 'epoch': 3} {'type': 'loss', 'content': 0.053043730556964874, 'timestamp': '2025-09-10 02:31:16.789799', 'step': 5210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:16.819930', 'step': 5210, 'epoch': 3} {'type': 'loss', 'content': 0.007864592596888542, 'timestamp': '2025-09-10 02:31:16.822660', 'step': 5211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:16.852152', 'step': 5211, 'epoch': 3} {'type': 'loss', 'content': 0.0004012619028799236, 'timestamp': '2025-09-10 02:31:16.875610', 'step': 5212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:16.906105', 'step': 5212, 'epoch': 3} {'type': 'loss', 'content': 0.00010462481441209093, 'timestamp': '2025-09-10 02:31:16.907983', 'step': 5213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:16.936564', 'step': 5213, 'epoch': 3} {'type': 'loss', 'content': 0.002815719461068511, 'timestamp': '2025-09-10 02:31:16.938555', 'step': 5214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:16.967324', 'step': 5214, 'epoch': 3} {'type': 'loss', 'content': 0.003537782933562994, 'timestamp': '2025-09-10 02:31:16.969160', 'step': 5215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:17.004696', 'step': 5215, 'epoch': 3} {'type': 'loss', 'content': 0.005013471934944391, 'timestamp': '2025-09-10 02:31:17.028151', 'step': 5216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:17.057071', 'step': 5216, 'epoch': 3} {'type': 'loss', 'content': 0.0006305575370788574, 'timestamp': '2025-09-10 02:31:17.059579', 'step': 5217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:17.090166', 'step': 5217, 'epoch': 3} {'type': 'loss', 'content': 0.015557162463665009, 'timestamp': '2025-09-10 02:31:17.092141', 'step': 5218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:17.122354', 'step': 5218, 'epoch': 3} {'type': 'loss', 'content': 0.005185294430702925, 'timestamp': '2025-09-10 02:31:17.124241', 'step': 5219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:17.153459', 'step': 5219, 'epoch': 3} {'type': 'loss', 'content': 0.0010910272831097245, 'timestamp': '2025-09-10 02:31:17.179637', 'step': 5220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:17.209356', 'step': 5220, 'epoch': 3} {'type': 'loss', 'content': 0.00012062443420290947, 'timestamp': '2025-09-10 02:31:17.211365', 'step': 5221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:17.245458', 'step': 5221, 'epoch': 3} {'type': 'loss', 'content': 0.005589938256889582, 'timestamp': '2025-09-10 02:31:17.247404', 'step': 5222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:17.275962', 'step': 5222, 'epoch': 3} {'type': 'loss', 'content': 0.01552930474281311, 'timestamp': '2025-09-10 02:31:17.283244', 'step': 5223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:17.315771', 'step': 5223, 'epoch': 3} {'type': 'loss', 'content': 0.0014602139126509428, 'timestamp': '2025-09-10 02:31:17.339146', 'step': 5224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:17.373490', 'step': 5224, 'epoch': 3} {'type': 'loss', 'content': 0.0019929243717342615, 'timestamp': '2025-09-10 02:31:17.375354', 'step': 5225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:17.404301', 'step': 5225, 'epoch': 3} {'type': 'loss', 'content': 0.007471051067113876, 'timestamp': '2025-09-10 02:31:17.406404', 'step': 5226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:17.435291', 'step': 5226, 'epoch': 3} {'type': 'loss', 'content': 0.003584515769034624, 'timestamp': '2025-09-10 02:31:17.437216', 'step': 5227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:17.465910', 'step': 5227, 'epoch': 3} {'type': 'loss', 'content': 0.007189448922872543, 'timestamp': '2025-09-10 02:31:17.490065', 'step': 5228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:17.519098', 'step': 5228, 'epoch': 3} {'type': 'loss', 'content': 0.0427686981856823, 'timestamp': '2025-09-10 02:31:17.521138', 'step': 5229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:17.553698', 'step': 5229, 'epoch': 3} {'type': 'loss', 'content': 0.0003127622476313263, 'timestamp': '2025-09-10 02:31:17.555539', 'step': 5230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:17.584885', 'step': 5230, 'epoch': 3} {'type': 'loss', 'content': 0.00016666494775563478, 'timestamp': '2025-09-10 02:31:17.588150', 'step': 5231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:17.620373', 'step': 5231, 'epoch': 3} {'type': 'loss', 'content': 0.00034709740430116653, 'timestamp': '2025-09-10 02:31:17.643883', 'step': 5232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:17.673210', 'step': 5232, 'epoch': 3} {'type': 'loss', 'content': 0.0011999139096587896, 'timestamp': '2025-09-10 02:31:17.679007', 'step': 5233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:17.710817', 'step': 5233, 'epoch': 3} {'type': 'loss', 'content': 0.004059411119669676, 'timestamp': '2025-09-10 02:31:17.712955', 'step': 5234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:17.741992', 'step': 5234, 'epoch': 3} {'type': 'loss', 'content': 4.660410922951996e-05, 'timestamp': '2025-09-10 02:31:17.745814', 'step': 5235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:17.776182', 'step': 5235, 'epoch': 3} {'type': 'loss', 'content': 0.0003501230094116181, 'timestamp': '2025-09-10 02:31:17.800536', 'step': 5236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:17.840391', 'step': 5236, 'epoch': 3} {'type': 'loss', 'content': 0.004818799439817667, 'timestamp': '2025-09-10 02:31:17.843662', 'step': 5237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:17.877827', 'step': 5237, 'epoch': 3} {'type': 'loss', 'content': 0.007570463698357344, 'timestamp': '2025-09-10 02:31:17.881350', 'step': 5238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:17.913806', 'step': 5238, 'epoch': 3} {'type': 'loss', 'content': 0.0011491699842736125, 'timestamp': '2025-09-10 02:31:17.918251', 'step': 5239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:17.957604', 'step': 5239, 'epoch': 3} {'type': 'loss', 'content': 0.0004111050220672041, 'timestamp': '2025-09-10 02:31:17.981099', 'step': 5240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:18.010200', 'step': 5240, 'epoch': 3} {'type': 'loss', 'content': 0.0013137703062966466, 'timestamp': '2025-09-10 02:31:18.015201', 'step': 5241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:18.047185', 'step': 5241, 'epoch': 3} {'type': 'loss', 'content': 0.008169819600880146, 'timestamp': '2025-09-10 02:31:18.049166', 'step': 5242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:18.080079', 'step': 5242, 'epoch': 3} {'type': 'loss', 'content': 0.02304341271519661, 'timestamp': '2025-09-10 02:31:18.082139', 'step': 5243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:18.111525', 'step': 5243, 'epoch': 3} {'type': 'loss', 'content': 0.0034230987075716257, 'timestamp': '2025-09-10 02:31:18.135008', 'step': 5244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:18.168534', 'step': 5244, 'epoch': 3} {'type': 'loss', 'content': 7.703209848841652e-05, 'timestamp': '2025-09-10 02:31:18.170437', 'step': 5245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:18.199407', 'step': 5245, 'epoch': 3} {'type': 'loss', 'content': 0.00011362635996192694, 'timestamp': '2025-09-10 02:31:18.201336', 'step': 5246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:18.231155', 'step': 5246, 'epoch': 3} {'type': 'loss', 'content': 0.0002412385365460068, 'timestamp': '2025-09-10 02:31:18.233207', 'step': 5247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:18.262069', 'step': 5247, 'epoch': 3} {'type': 'loss', 'content': 0.003811098635196686, 'timestamp': '2025-09-10 02:31:18.285668', 'step': 5248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:18.314575', 'step': 5248, 'epoch': 3} {'type': 'loss', 'content': 0.0002703253994695842, 'timestamp': '2025-09-10 02:31:18.317087', 'step': 5249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:18.345929', 'step': 5249, 'epoch': 3} {'type': 'loss', 'content': 0.00048442385741509497, 'timestamp': '2025-09-10 02:31:18.349944', 'step': 5250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:18.378979', 'step': 5250, 'epoch': 3} {'type': 'loss', 'content': 0.00034568950650282204, 'timestamp': '2025-09-10 02:31:18.380940', 'step': 5251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:18.409402', 'step': 5251, 'epoch': 3} {'type': 'loss', 'content': 0.00010054224549094215, 'timestamp': '2025-09-10 02:31:18.432673', 'step': 5252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:18.464971', 'step': 5252, 'epoch': 3} {'type': 'loss', 'content': 0.002652938012033701, 'timestamp': '2025-09-10 02:31:18.466848', 'step': 5253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:18.495717', 'step': 5253, 'epoch': 3} {'type': 'loss', 'content': 0.00024533795658499, 'timestamp': '2025-09-10 02:31:18.497892', 'step': 5254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:18.530480', 'step': 5254, 'epoch': 3} {'type': 'loss', 'content': 0.00039591133827343583, 'timestamp': '2025-09-10 02:31:18.532446', 'step': 5255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:18.561769', 'step': 5255, 'epoch': 3} {'type': 'loss', 'content': 0.0004582552064675838, 'timestamp': '2025-09-10 02:31:18.585207', 'step': 5256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:18.614450', 'step': 5256, 'epoch': 3} {'type': 'loss', 'content': 0.002351920586079359, 'timestamp': '2025-09-10 02:31:18.616447', 'step': 5257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:18.645554', 'step': 5257, 'epoch': 3} {'type': 'loss', 'content': 0.0006441808654926717, 'timestamp': '2025-09-10 02:31:18.647553', 'step': 5258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:18.676357', 'step': 5258, 'epoch': 3} {'type': 'loss', 'content': 0.0015084192855283618, 'timestamp': '2025-09-10 02:31:18.678260', 'step': 5259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:18.707329', 'step': 5259, 'epoch': 3} {'type': 'loss', 'content': 0.007480281870812178, 'timestamp': '2025-09-10 02:31:18.730777', 'step': 5260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:18.765026', 'step': 5260, 'epoch': 3} {'type': 'loss', 'content': 0.0005572711233980954, 'timestamp': '2025-09-10 02:31:18.767053', 'step': 5261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:18.795958', 'step': 5261, 'epoch': 3} {'type': 'loss', 'content': 0.00013719707203563303, 'timestamp': '2025-09-10 02:31:18.797997', 'step': 5262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:18.827324', 'step': 5262, 'epoch': 3} {'type': 'loss', 'content': 0.002512450562790036, 'timestamp': '2025-09-10 02:31:18.830934', 'step': 5263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:18.863313', 'step': 5263, 'epoch': 3} {'type': 'loss', 'content': 0.0021838522516191006, 'timestamp': '2025-09-10 02:31:18.888710', 'step': 5264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:18.918165', 'step': 5264, 'epoch': 3} {'type': 'loss', 'content': 0.0029657420236617327, 'timestamp': '2025-09-10 02:31:18.920329', 'step': 5265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:18.953243', 'step': 5265, 'epoch': 3} {'type': 'loss', 'content': 0.00030763083486817777, 'timestamp': '2025-09-10 02:31:18.956482', 'step': 5266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:18.986491', 'step': 5266, 'epoch': 3} {'type': 'loss', 'content': 0.004049286246299744, 'timestamp': '2025-09-10 02:31:18.988433', 'step': 5267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:19.017144', 'step': 5267, 'epoch': 3} {'type': 'loss', 'content': 7.353690307354555e-05, 'timestamp': '2025-09-10 02:31:19.041754', 'step': 5268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:19.070961', 'step': 5268, 'epoch': 3} {'type': 'loss', 'content': 7.854583964217454e-05, 'timestamp': '2025-09-10 02:31:19.073049', 'step': 5269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:19.102430', 'step': 5269, 'epoch': 3} {'type': 'loss', 'content': 0.0026422517839819193, 'timestamp': '2025-09-10 02:31:19.104551', 'step': 5270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:19.133378', 'step': 5270, 'epoch': 3} {'type': 'loss', 'content': 0.014817780815064907, 'timestamp': '2025-09-10 02:31:19.135324', 'step': 5271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:19.164117', 'step': 5271, 'epoch': 3} {'type': 'loss', 'content': 0.001545860548503697, 'timestamp': '2025-09-10 02:31:19.187985', 'step': 5272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:19.217188', 'step': 5272, 'epoch': 3} {'type': 'loss', 'content': 0.0019337693229317665, 'timestamp': '2025-09-10 02:31:19.219435', 'step': 5273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:19.248426', 'step': 5273, 'epoch': 3} {'type': 'loss', 'content': 0.00020618803682737052, 'timestamp': '2025-09-10 02:31:19.250457', 'step': 5274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:19.279890', 'step': 5274, 'epoch': 3} {'type': 'loss', 'content': 0.00018796950462274253, 'timestamp': '2025-09-10 02:31:19.282053', 'step': 5275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:19.311019', 'step': 5275, 'epoch': 3} {'type': 'loss', 'content': 0.0075276815332472324, 'timestamp': '2025-09-10 02:31:19.334589', 'step': 5276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:19.364301', 'step': 5276, 'epoch': 3} {'type': 'loss', 'content': 0.017516721040010452, 'timestamp': '2025-09-10 02:31:19.366156', 'step': 5277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:19.394787', 'step': 5277, 'epoch': 3} {'type': 'loss', 'content': 0.00011237651779083535, 'timestamp': '2025-09-10 02:31:19.396829', 'step': 5278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:19.425818', 'step': 5278, 'epoch': 3} {'type': 'loss', 'content': 0.00078305829083547, 'timestamp': '2025-09-10 02:31:19.427429', 'step': 5279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:19.456639', 'step': 5279, 'epoch': 3} {'type': 'loss', 'content': 0.01742679253220558, 'timestamp': '2025-09-10 02:31:19.481385', 'step': 5280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:19.510918', 'step': 5280, 'epoch': 3} {'type': 'loss', 'content': 0.006091821938753128, 'timestamp': '2025-09-10 02:31:19.512717', 'step': 5281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:19.541618', 'step': 5281, 'epoch': 3} {'type': 'loss', 'content': 0.01619417779147625, 'timestamp': '2025-09-10 02:31:19.543826', 'step': 5282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:19.572648', 'step': 5282, 'epoch': 3} {'type': 'loss', 'content': 0.02908501960337162, 'timestamp': '2025-09-10 02:31:19.574463', 'step': 5283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:19.603516', 'step': 5283, 'epoch': 3} {'type': 'loss', 'content': 0.0011206221533939242, 'timestamp': '2025-09-10 02:31:19.627157', 'step': 5284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:19.656075', 'step': 5284, 'epoch': 3} {'type': 'loss', 'content': 7.628348976140842e-05, 'timestamp': '2025-09-10 02:31:19.658185', 'step': 5285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:19.687559', 'step': 5285, 'epoch': 3} {'type': 'loss', 'content': 7.452488353010267e-05, 'timestamp': '2025-09-10 02:31:19.691340', 'step': 5286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:19.720388', 'step': 5286, 'epoch': 3} {'type': 'loss', 'content': 0.01618286408483982, 'timestamp': '2025-09-10 02:31:19.722314', 'step': 5287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:19.753329', 'step': 5287, 'epoch': 3} {'type': 'loss', 'content': 0.00011036908108508214, 'timestamp': '2025-09-10 02:31:19.776841', 'step': 5288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:19.811691', 'step': 5288, 'epoch': 3} {'type': 'loss', 'content': 0.005474313162267208, 'timestamp': '2025-09-10 02:31:19.813569', 'step': 5289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:19.842327', 'step': 5289, 'epoch': 3} {'type': 'loss', 'content': 0.0004412243433762342, 'timestamp': '2025-09-10 02:31:19.848291', 'step': 5290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:19.880027', 'step': 5290, 'epoch': 3} {'type': 'loss', 'content': 0.00018016780086327344, 'timestamp': '2025-09-10 02:31:19.882325', 'step': 5291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:19.912422', 'step': 5291, 'epoch': 3} {'type': 'loss', 'content': 0.02032560668885708, 'timestamp': '2025-09-10 02:31:19.936006', 'step': 5292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:19.966711', 'step': 5292, 'epoch': 3} {'type': 'loss', 'content': 0.00012549829261843115, 'timestamp': '2025-09-10 02:31:19.968835', 'step': 5293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:19.999016', 'step': 5293, 'epoch': 3} {'type': 'loss', 'content': 0.0002709024411160499, 'timestamp': '2025-09-10 02:31:20.001308', 'step': 5294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:20.030992', 'step': 5294, 'epoch': 3} {'type': 'loss', 'content': 0.033939070999622345, 'timestamp': '2025-09-10 02:31:20.033124', 'step': 5295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:20.062470', 'step': 5295, 'epoch': 3} {'type': 'loss', 'content': 0.04416469484567642, 'timestamp': '2025-09-10 02:31:20.086154', 'step': 5296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:20.115566', 'step': 5296, 'epoch': 3} {'type': 'loss', 'content': 0.00032265594927594066, 'timestamp': '2025-09-10 02:31:20.121141', 'step': 5297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:20.160260', 'step': 5297, 'epoch': 3} {'type': 'loss', 'content': 0.005029712338000536, 'timestamp': '2025-09-10 02:31:20.162311', 'step': 5298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:20.193516', 'step': 5298, 'epoch': 3} {'type': 'loss', 'content': 0.009289773181080818, 'timestamp': '2025-09-10 02:31:20.197798', 'step': 5299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:20.235325', 'step': 5299, 'epoch': 3} {'type': 'loss', 'content': 8.617424464318901e-05, 'timestamp': '2025-09-10 02:31:20.259118', 'step': 5300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:20.288455', 'step': 5300, 'epoch': 3} {'type': 'loss', 'content': 0.0001644386356929317, 'timestamp': '2025-09-10 02:31:20.290912', 'step': 5301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:20.322936', 'step': 5301, 'epoch': 3} {'type': 'loss', 'content': 8.05677191237919e-05, 'timestamp': '2025-09-10 02:31:20.325139', 'step': 5302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:20.355612', 'step': 5302, 'epoch': 3} {'type': 'loss', 'content': 0.05833953246474266, 'timestamp': '2025-09-10 02:31:20.367871', 'step': 5303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:20.401700', 'step': 5303, 'epoch': 3} {'type': 'loss', 'content': 0.0016823047772049904, 'timestamp': '2025-09-10 02:31:20.425730', 'step': 5304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:20.454809', 'step': 5304, 'epoch': 3} {'type': 'loss', 'content': 0.0006739491946063936, 'timestamp': '2025-09-10 02:31:20.456751', 'step': 5305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:20.487365', 'step': 5305, 'epoch': 3} {'type': 'loss', 'content': 0.001487448113039136, 'timestamp': '2025-09-10 02:31:20.490804', 'step': 5306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:20.519659', 'step': 5306, 'epoch': 3} {'type': 'loss', 'content': 0.006275292951613665, 'timestamp': '2025-09-10 02:31:20.521833', 'step': 5307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:20.551153', 'step': 5307, 'epoch': 3} {'type': 'loss', 'content': 0.00020399487402755767, 'timestamp': '2025-09-10 02:31:20.574851', 'step': 5308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:20.609004', 'step': 5308, 'epoch': 3} {'type': 'loss', 'content': 7.673249638173729e-05, 'timestamp': '2025-09-10 02:31:20.610806', 'step': 5309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:20.639468', 'step': 5309, 'epoch': 3} {'type': 'loss', 'content': 0.0006607953691855073, 'timestamp': '2025-09-10 02:31:20.641553', 'step': 5310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:20.670452', 'step': 5310, 'epoch': 3} {'type': 'loss', 'content': 0.000300820596748963, 'timestamp': '2025-09-10 02:31:20.672538', 'step': 5311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:20.701310', 'step': 5311, 'epoch': 3} {'type': 'loss', 'content': 0.00024385188589803874, 'timestamp': '2025-09-10 02:31:20.724949', 'step': 5312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:20.754340', 'step': 5312, 'epoch': 3} {'type': 'loss', 'content': 0.01962227001786232, 'timestamp': '2025-09-10 02:31:20.757731', 'step': 5313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:20.789592', 'step': 5313, 'epoch': 3} {'type': 'loss', 'content': 0.0009850615169852972, 'timestamp': '2025-09-10 02:31:20.791832', 'step': 5314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:20.823986', 'step': 5314, 'epoch': 3} {'type': 'loss', 'content': 0.0003341401170473546, 'timestamp': '2025-09-10 02:31:20.826172', 'step': 5315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:20.855012', 'step': 5315, 'epoch': 3} {'type': 'loss', 'content': 0.005033438093960285, 'timestamp': '2025-09-10 02:31:20.880170', 'step': 5316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:20.910178', 'step': 5316, 'epoch': 3} {'type': 'loss', 'content': 0.013828927651047707, 'timestamp': '2025-09-10 02:31:20.912251', 'step': 5317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:20.942083', 'step': 5317, 'epoch': 3} {'type': 'loss', 'content': 0.0004040444327984005, 'timestamp': '2025-09-10 02:31:20.944122', 'step': 5318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:20.976867', 'step': 5318, 'epoch': 3} {'type': 'loss', 'content': 0.00015366276784334332, 'timestamp': '2025-09-10 02:31:20.979054', 'step': 5319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:21.007707', 'step': 5319, 'epoch': 3} {'type': 'loss', 'content': 0.0336005724966526, 'timestamp': '2025-09-10 02:31:21.031112', 'step': 5320, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1582003754624}], 'timestamp': '2025-09-10 02:31:23.033008', 'step': 5320, 'epoch': 3} {'type': 'pplx', 'content': 2551079.3306668666, 'timestamp': '2025-09-10 02:31:23.035009', 'step': 5320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:23.062383', 'step': 5320, 'epoch': 3} {'type': 'loss', 'content': 0.005831195507198572, 'timestamp': '2025-09-10 02:31:23.064508', 'step': 5321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:23.093977', 'step': 5321, 'epoch': 3} {'type': 'loss', 'content': 0.002231104066595435, 'timestamp': '2025-09-10 02:31:23.098600', 'step': 5322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:23.135118', 'step': 5322, 'epoch': 3} {'type': 'loss', 'content': 0.0038797142915427685, 'timestamp': '2025-09-10 02:31:23.137440', 'step': 5323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:23.166399', 'step': 5323, 'epoch': 3} {'type': 'loss', 'content': 0.009675068780779839, 'timestamp': '2025-09-10 02:31:23.190502', 'step': 5324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:23.220054', 'step': 5324, 'epoch': 3} {'type': 'loss', 'content': 0.017302149906754494, 'timestamp': '2025-09-10 02:31:23.225215', 'step': 5325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:23.256571', 'step': 5325, 'epoch': 3} {'type': 'loss', 'content': 0.0034467519726604223, 'timestamp': '2025-09-10 02:31:23.259000', 'step': 5326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:23.288559', 'step': 5326, 'epoch': 3} {'type': 'loss', 'content': 0.00029501141398213804, 'timestamp': '2025-09-10 02:31:23.290692', 'step': 5327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:23.319613', 'step': 5327, 'epoch': 3} {'type': 'loss', 'content': 0.05141686275601387, 'timestamp': '2025-09-10 02:31:23.343802', 'step': 5328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:23.378494', 'step': 5328, 'epoch': 3} {'type': 'loss', 'content': 0.0002961040590889752, 'timestamp': '2025-09-10 02:31:23.380572', 'step': 5329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:23.413316', 'step': 5329, 'epoch': 3} {'type': 'loss', 'content': 0.0018813759088516235, 'timestamp': '2025-09-10 02:31:23.415832', 'step': 5330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:23.444493', 'step': 5330, 'epoch': 3} {'type': 'loss', 'content': 0.0022770597133785486, 'timestamp': '2025-09-10 02:31:23.446223', 'step': 5331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:23.479983', 'step': 5331, 'epoch': 3} {'type': 'loss', 'content': 0.016446303576231003, 'timestamp': '2025-09-10 02:31:23.506221', 'step': 5332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:23.535570', 'step': 5332, 'epoch': 3} {'type': 'loss', 'content': 0.0007620741962455213, 'timestamp': '2025-09-10 02:31:23.540073', 'step': 5333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:23.570257', 'step': 5333, 'epoch': 3} {'type': 'loss', 'content': 0.0005776284378953278, 'timestamp': '2025-09-10 02:31:23.572491', 'step': 5334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:23.606212', 'step': 5334, 'epoch': 3} {'type': 'loss', 'content': 0.017588842660188675, 'timestamp': '2025-09-10 02:31:23.608382', 'step': 5335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:23.642529', 'step': 5335, 'epoch': 3} {'type': 'loss', 'content': 0.00035538533120416105, 'timestamp': '2025-09-10 02:31:23.666123', 'step': 5336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:23.704544', 'step': 5336, 'epoch': 3} {'type': 'loss', 'content': 0.019263219088315964, 'timestamp': '2025-09-10 02:31:23.706683', 'step': 5337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:23.745640', 'step': 5337, 'epoch': 3} {'type': 'loss', 'content': 0.04862800985574722, 'timestamp': '2025-09-10 02:31:23.748205', 'step': 5338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:23.781046', 'step': 5338, 'epoch': 3} {'type': 'loss', 'content': 0.002783591626212001, 'timestamp': '2025-09-10 02:31:23.783018', 'step': 5339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:23.812930', 'step': 5339, 'epoch': 3} {'type': 'loss', 'content': 0.0006132293492555618, 'timestamp': '2025-09-10 02:31:23.837116', 'step': 5340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:23.867392', 'step': 5340, 'epoch': 3} {'type': 'loss', 'content': 0.0003858158888760954, 'timestamp': '2025-09-10 02:31:23.869425', 'step': 5341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:23.898673', 'step': 5341, 'epoch': 3} {'type': 'loss', 'content': 0.0003180743078701198, 'timestamp': '2025-09-10 02:31:23.900788', 'step': 5342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:23.929420', 'step': 5342, 'epoch': 3} {'type': 'loss', 'content': 0.012362735345959663, 'timestamp': '2025-09-10 02:31:23.931464', 'step': 5343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:23.960404', 'step': 5343, 'epoch': 3} {'type': 'loss', 'content': 0.006400517653673887, 'timestamp': '2025-09-10 02:31:23.984040', 'step': 5344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:24.014733', 'step': 5344, 'epoch': 3} {'type': 'loss', 'content': 0.0027917451225221157, 'timestamp': '2025-09-10 02:31:24.017888', 'step': 5345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:24.048798', 'step': 5345, 'epoch': 3} {'type': 'loss', 'content': 0.013821698725223541, 'timestamp': '2025-09-10 02:31:24.050810', 'step': 5346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:24.084439', 'step': 5346, 'epoch': 3} {'type': 'loss', 'content': 0.002162518445402384, 'timestamp': '2025-09-10 02:31:24.086651', 'step': 5347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:24.116500', 'step': 5347, 'epoch': 3} {'type': 'loss', 'content': 0.00018192424613516778, 'timestamp': '2025-09-10 02:31:24.141334', 'step': 5348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:24.170338', 'step': 5348, 'epoch': 3} {'type': 'loss', 'content': 0.0048463004641234875, 'timestamp': '2025-09-10 02:31:24.173256', 'step': 5349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:24.203038', 'step': 5349, 'epoch': 3} {'type': 'loss', 'content': 0.003790907561779022, 'timestamp': '2025-09-10 02:31:24.224082', 'step': 5350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:24.254891', 'step': 5350, 'epoch': 3} {'type': 'loss', 'content': 0.00026786929811351, 'timestamp': '2025-09-10 02:31:24.258520', 'step': 5351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:24.287612', 'step': 5351, 'epoch': 3} {'type': 'loss', 'content': 0.028449947014451027, 'timestamp': '2025-09-10 02:31:24.312694', 'step': 5352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:24.359701', 'step': 5352, 'epoch': 3} {'type': 'loss', 'content': 0.006347959395498037, 'timestamp': '2025-09-10 02:31:24.361584', 'step': 5353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:24.395475', 'step': 5353, 'epoch': 3} {'type': 'loss', 'content': 0.010898602195084095, 'timestamp': '2025-09-10 02:31:24.397680', 'step': 5354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:24.426488', 'step': 5354, 'epoch': 3} {'type': 'loss', 'content': 0.0015070537338033319, 'timestamp': '2025-09-10 02:31:24.428418', 'step': 5355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:24.457631', 'step': 5355, 'epoch': 3} {'type': 'loss', 'content': 0.0019601122476160526, 'timestamp': '2025-09-10 02:31:24.481157', 'step': 5356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:24.510438', 'step': 5356, 'epoch': 3} {'type': 'loss', 'content': 0.002505767857655883, 'timestamp': '2025-09-10 02:31:24.515852', 'step': 5357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:24.544584', 'step': 5357, 'epoch': 3} {'type': 'loss', 'content': 0.04163316637277603, 'timestamp': '2025-09-10 02:31:24.546994', 'step': 5358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:24.578752', 'step': 5358, 'epoch': 3} {'type': 'loss', 'content': 0.011395329609513283, 'timestamp': '2025-09-10 02:31:24.580942', 'step': 5359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:24.614969', 'step': 5359, 'epoch': 3} {'type': 'loss', 'content': 0.0008985429303720593, 'timestamp': '2025-09-10 02:31:24.639625', 'step': 5360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:24.668878', 'step': 5360, 'epoch': 3} {'type': 'loss', 'content': 0.028808802366256714, 'timestamp': '2025-09-10 02:31:24.672278', 'step': 5361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:24.701338', 'step': 5361, 'epoch': 3} {'type': 'loss', 'content': 0.0006094464915804565, 'timestamp': '2025-09-10 02:31:24.703520', 'step': 5362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:24.738751', 'step': 5362, 'epoch': 3} {'type': 'loss', 'content': 0.006529428996145725, 'timestamp': '2025-09-10 02:31:24.740914', 'step': 5363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:24.785758', 'step': 5363, 'epoch': 3} {'type': 'loss', 'content': 0.004220775328576565, 'timestamp': '2025-09-10 02:31:24.809807', 'step': 5364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:24.839278', 'step': 5364, 'epoch': 3} {'type': 'loss', 'content': 0.04075964167714119, 'timestamp': '2025-09-10 02:31:24.842825', 'step': 5365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:24.876161', 'step': 5365, 'epoch': 3} {'type': 'loss', 'content': 0.009647761471569538, 'timestamp': '2025-09-10 02:31:24.882712', 'step': 5366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:24.912694', 'step': 5366, 'epoch': 3} {'type': 'loss', 'content': 0.0018993124831467867, 'timestamp': '2025-09-10 02:31:24.915139', 'step': 5367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:24.944155', 'step': 5367, 'epoch': 3} {'type': 'loss', 'content': 0.0010295561514794827, 'timestamp': '2025-09-10 02:31:24.967567', 'step': 5368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:24.998242', 'step': 5368, 'epoch': 3} {'type': 'loss', 'content': 0.0020003027748316526, 'timestamp': '2025-09-10 02:31:25.002331', 'step': 5369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:25.040875', 'step': 5369, 'epoch': 3} {'type': 'loss', 'content': 0.0005581422592513263, 'timestamp': '2025-09-10 02:31:25.044665', 'step': 5370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:25.074439', 'step': 5370, 'epoch': 3} {'type': 'loss', 'content': 0.0026471889577805996, 'timestamp': '2025-09-10 02:31:25.076301', 'step': 5371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:25.108638', 'step': 5371, 'epoch': 3} {'type': 'loss', 'content': 0.023278558626770973, 'timestamp': '2025-09-10 02:31:25.133488', 'step': 5372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:25.163686', 'step': 5372, 'epoch': 3} {'type': 'loss', 'content': 0.000599853228777647, 'timestamp': '2025-09-10 02:31:25.165653', 'step': 5373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:25.194542', 'step': 5373, 'epoch': 3} {'type': 'loss', 'content': 0.0011137856636196375, 'timestamp': '2025-09-10 02:31:25.197585', 'step': 5374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:25.228242', 'step': 5374, 'epoch': 3} {'type': 'loss', 'content': 0.014667009934782982, 'timestamp': '2025-09-10 02:31:25.230258', 'step': 5375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:25.261129', 'step': 5375, 'epoch': 3} {'type': 'loss', 'content': 0.008366279304027557, 'timestamp': '2025-09-10 02:31:25.288421', 'step': 5376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:25.317526', 'step': 5376, 'epoch': 3} {'type': 'loss', 'content': 0.0007964317337609828, 'timestamp': '2025-09-10 02:31:25.319657', 'step': 5377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:25.364801', 'step': 5377, 'epoch': 3} {'type': 'loss', 'content': 0.009769702330231667, 'timestamp': '2025-09-10 02:31:25.366897', 'step': 5378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:25.398327', 'step': 5378, 'epoch': 3} {'type': 'loss', 'content': 0.0003478755825199187, 'timestamp': '2025-09-10 02:31:25.401384', 'step': 5379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:25.436273', 'step': 5379, 'epoch': 3} {'type': 'loss', 'content': 0.005425657145678997, 'timestamp': '2025-09-10 02:31:25.463427', 'step': 5380, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:25.498521', 'step': 5380, 'epoch': 3} {'type': 'loss', 'content': 0.006446263287216425, 'timestamp': '2025-09-10 02:31:25.505037', 'step': 5381, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:25.537189', 'step': 5381, 'epoch': 3} {'type': 'loss', 'content': 0.0004652982752304524, 'timestamp': '2025-09-10 02:31:25.539203', 'step': 5382, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:25.569171', 'step': 5382, 'epoch': 3} {'type': 'loss', 'content': 0.0002952113572973758, 'timestamp': '2025-09-10 02:31:25.570955', 'step': 5383, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:25.600426', 'step': 5383, 'epoch': 3} {'type': 'loss', 'content': 0.0017549424665048718, 'timestamp': '2025-09-10 02:31:25.625726', 'step': 5384, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:25.655258', 'step': 5384, 'epoch': 3} {'type': 'loss', 'content': 0.003002666402608156, 'timestamp': '2025-09-10 02:31:25.657887', 'step': 5385, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:25.691141', 'step': 5385, 'epoch': 3} {'type': 'loss', 'content': 0.010298574343323708, 'timestamp': '2025-09-10 02:31:25.693059', 'step': 5386, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:25.724195', 'step': 5386, 'epoch': 3} {'type': 'loss', 'content': 0.00422916142269969, 'timestamp': '2025-09-10 02:31:25.726089', 'step': 5387, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:25.759233', 'step': 5387, 'epoch': 3} {'type': 'loss', 'content': 0.0069030639715492725, 'timestamp': '2025-09-10 02:31:25.782721', 'step': 5388, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:25.811771', 'step': 5388, 'epoch': 3} {'type': 'loss', 'content': 0.0004885837552137673, 'timestamp': '2025-09-10 02:31:25.813703', 'step': 5389, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:25.845112', 'step': 5389, 'epoch': 3} {'type': 'loss', 'content': 0.011209973134100437, 'timestamp': '2025-09-10 02:31:25.847313', 'step': 5390, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:25.883635', 'step': 5390, 'epoch': 3} {'type': 'loss', 'content': 0.00047788023948669434, 'timestamp': '2025-09-10 02:31:25.886748', 'step': 5391, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:25.922538', 'step': 5391, 'epoch': 3} {'type': 'loss', 'content': 0.0013479841873049736, 'timestamp': '2025-09-10 02:31:25.946145', 'step': 5392, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:25.976653', 'step': 5392, 'epoch': 3} {'type': 'loss', 'content': 0.0030322824604809284, 'timestamp': '2025-09-10 02:31:25.979194', 'step': 5393, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:26.010820', 'step': 5393, 'epoch': 3} {'type': 'loss', 'content': 0.0008384265820495784, 'timestamp': '2025-09-10 02:31:26.013324', 'step': 5394, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:26.043891', 'step': 5394, 'epoch': 3} {'type': 'loss', 'content': 0.0038477268535643816, 'timestamp': '2025-09-10 02:31:26.045876', 'step': 5395, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:26.075628', 'step': 5395, 'epoch': 3} {'type': 'loss', 'content': 0.005468371324241161, 'timestamp': '2025-09-10 02:31:26.100339', 'step': 5396, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:26.128968', 'step': 5396, 'epoch': 3} {'type': 'loss', 'content': 0.0058067962527275085, 'timestamp': '2025-09-10 02:31:26.131334', 'step': 5397, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:26.160638', 'step': 5397, 'epoch': 3} {'type': 'loss', 'content': 0.009803896769881248, 'timestamp': '2025-09-10 02:31:26.162433', 'step': 5398, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:26.191702', 'step': 5398, 'epoch': 3} {'type': 'loss', 'content': 0.0013640226097777486, 'timestamp': '2025-09-10 02:31:26.198297', 'step': 5399, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:26.232493', 'step': 5399, 'epoch': 3} {'type': 'loss', 'content': 0.00018000802083406597, 'timestamp': '2025-09-10 02:31:26.255848', 'step': 5400, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:26.288083', 'step': 5400, 'epoch': 3} {'type': 'loss', 'content': 0.01599857583642006, 'timestamp': '2025-09-10 02:31:26.290038', 'step': 5401, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:26.318620', 'step': 5401, 'epoch': 3} {'type': 'loss', 'content': 0.013749159872531891, 'timestamp': '2025-09-10 02:31:26.320767', 'step': 5402, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:26.351926', 'step': 5402, 'epoch': 3} {'type': 'loss', 'content': 0.0023441340308636427, 'timestamp': '2025-09-10 02:31:26.355992', 'step': 5403, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:26.385630', 'step': 5403, 'epoch': 3} {'type': 'loss', 'content': 0.001614662236534059, 'timestamp': '2025-09-10 02:31:26.411454', 'step': 5404, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:26.440668', 'step': 5404, 'epoch': 3} {'type': 'loss', 'content': 0.001221572863869369, 'timestamp': '2025-09-10 02:31:26.443952', 'step': 5405, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:26.473381', 'step': 5405, 'epoch': 3} {'type': 'loss', 'content': 0.0012589620891958475, 'timestamp': '2025-09-10 02:31:26.475347', 'step': 5406, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:26.504461', 'step': 5406, 'epoch': 3} {'type': 'loss', 'content': 0.005226811859756708, 'timestamp': '2025-09-10 02:31:26.508840', 'step': 5407, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:26.538014', 'step': 5407, 'epoch': 3} {'type': 'loss', 'content': 0.0004326138296164572, 'timestamp': '2025-09-10 02:31:26.561660', 'step': 5408, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:26.597166', 'step': 5408, 'epoch': 3} {'type': 'loss', 'content': 0.0002521525020711124, 'timestamp': '2025-09-10 02:31:26.599529', 'step': 5409, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:26.629630', 'step': 5409, 'epoch': 3} {'type': 'loss', 'content': 0.0007950930739752948, 'timestamp': '2025-09-10 02:31:26.632036', 'step': 5410, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:26.661393', 'step': 5410, 'epoch': 3} {'type': 'loss', 'content': 0.00030989127117209136, 'timestamp': '2025-09-10 02:31:26.663523', 'step': 5411, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:26.692967', 'step': 5411, 'epoch': 3} {'type': 'loss', 'content': 0.0004943537642247975, 'timestamp': '2025-09-10 02:31:26.717318', 'step': 5412, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:26.747413', 'step': 5412, 'epoch': 3} {'type': 'loss', 'content': 0.0009193782461807132, 'timestamp': '2025-09-10 02:31:26.753106', 'step': 5413, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:26.783751', 'step': 5413, 'epoch': 3} {'type': 'loss', 'content': 0.03788406029343605, 'timestamp': '2025-09-10 02:31:26.785876', 'step': 5414, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:26.823038', 'step': 5414, 'epoch': 3} {'type': 'loss', 'content': 0.0005143205635249615, 'timestamp': '2025-09-10 02:31:26.826943', 'step': 5415, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:26.857698', 'step': 5415, 'epoch': 3} {'type': 'loss', 'content': 0.00025871643447317183, 'timestamp': '2025-09-10 02:31:26.881250', 'step': 5416, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:26.911551', 'step': 5416, 'epoch': 3} {'type': 'loss', 'content': 0.003082282841205597, 'timestamp': '2025-09-10 02:31:26.913811', 'step': 5417, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:26.950935', 'step': 5417, 'epoch': 3} {'type': 'loss', 'content': 0.0006056904676370323, 'timestamp': '2025-09-10 02:31:26.954099', 'step': 5418, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:26.982838', 'step': 5418, 'epoch': 3} {'type': 'loss', 'content': 0.001431368524208665, 'timestamp': '2025-09-10 02:31:26.985716', 'step': 5419, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:27.024474', 'step': 5419, 'epoch': 3} {'type': 'loss', 'content': 0.0009165522642433643, 'timestamp': '2025-09-10 02:31:27.054829', 'step': 5420, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:27.085200', 'step': 5420, 'epoch': 3} {'type': 'loss', 'content': 0.002991889836266637, 'timestamp': '2025-09-10 02:31:27.087406', 'step': 5421, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:27.116902', 'step': 5421, 'epoch': 3} {'type': 'loss', 'content': 0.0011327136307954788, 'timestamp': '2025-09-10 02:31:27.118860', 'step': 5422, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:27.150621', 'step': 5422, 'epoch': 3} {'type': 'loss', 'content': 0.00021207370446063578, 'timestamp': '2025-09-10 02:31:27.158499', 'step': 5423, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:27.191042', 'step': 5423, 'epoch': 3} {'type': 'loss', 'content': 0.0027875422965735197, 'timestamp': '2025-09-10 02:31:27.216448', 'step': 5424, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:27.246280', 'step': 5424, 'epoch': 3} {'type': 'loss', 'content': 0.0001328000653302297, 'timestamp': '2025-09-10 02:31:27.248384', 'step': 5425, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:27.278931', 'step': 5425, 'epoch': 3} {'type': 'loss', 'content': 0.0025467739906162024, 'timestamp': '2025-09-10 02:31:27.281134', 'step': 5426, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:27.311445', 'step': 5426, 'epoch': 3} {'type': 'loss', 'content': 0.0028369533829391003, 'timestamp': '2025-09-10 02:31:27.313875', 'step': 5427, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:27.342705', 'step': 5427, 'epoch': 3} {'type': 'loss', 'content': 0.010322848334908485, 'timestamp': '2025-09-10 02:31:27.370233', 'step': 5428, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:27.399460', 'step': 5428, 'epoch': 3} {'type': 'loss', 'content': 0.0004985965206287801, 'timestamp': '2025-09-10 02:31:27.401479', 'step': 5429, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:27.442792', 'step': 5429, 'epoch': 3} {'type': 'loss', 'content': 0.0024056960828602314, 'timestamp': '2025-09-10 02:31:27.444562', 'step': 5430, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:27.473692', 'step': 5430, 'epoch': 3} {'type': 'loss', 'content': 0.0010101028019562364, 'timestamp': '2025-09-10 02:31:27.475638', 'step': 5431, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:27.504217', 'step': 5431, 'epoch': 3} {'type': 'loss', 'content': 0.0004391186812426895, 'timestamp': '2025-09-10 02:31:27.531582', 'step': 5432, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:27.560409', 'step': 5432, 'epoch': 3} {'type': 'loss', 'content': 0.0015307767316699028, 'timestamp': '2025-09-10 02:31:27.562282', 'step': 5433, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:27.593316', 'step': 5433, 'epoch': 3} {'type': 'loss', 'content': 0.009664238430559635, 'timestamp': '2025-09-10 02:31:27.596673', 'step': 5434, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:27.629062', 'step': 5434, 'epoch': 3} {'type': 'loss', 'content': 0.045008838176727295, 'timestamp': '2025-09-10 02:31:27.631117', 'step': 5435, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:27.663540', 'step': 5435, 'epoch': 3} {'type': 'loss', 'content': 0.0011996644316241145, 'timestamp': '2025-09-10 02:31:27.687543', 'step': 5436, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:27.716411', 'step': 5436, 'epoch': 3} {'type': 'loss', 'content': 0.0003019919095095247, 'timestamp': '2025-09-10 02:31:27.718376', 'step': 5437, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:27.749022', 'step': 5437, 'epoch': 3} {'type': 'loss', 'content': 0.004851988051086664, 'timestamp': '2025-09-10 02:31:27.751149', 'step': 5438, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:27.780038', 'step': 5438, 'epoch': 3} {'type': 'loss', 'content': 0.00012647596304304898, 'timestamp': '2025-09-10 02:31:27.782089', 'step': 5439, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:27.810878', 'step': 5439, 'epoch': 3} {'type': 'loss', 'content': 9.743525879457593e-05, 'timestamp': '2025-09-10 02:31:27.834312', 'step': 5440, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:27.865423', 'step': 5440, 'epoch': 3} {'type': 'loss', 'content': 0.0018475401448085904, 'timestamp': '2025-09-10 02:31:27.867279', 'step': 5441, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:27.895647', 'step': 5441, 'epoch': 3} {'type': 'loss', 'content': 0.06767947226762772, 'timestamp': '2025-09-10 02:31:27.899172', 'step': 5442, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:27.928996', 'step': 5442, 'epoch': 3} {'type': 'loss', 'content': 0.003510425565764308, 'timestamp': '2025-09-10 02:31:27.933140', 'step': 5443, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:27.962555', 'step': 5443, 'epoch': 3} {'type': 'loss', 'content': 0.0025570436846464872, 'timestamp': '2025-09-10 02:31:27.986290', 'step': 5444, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:28.015107', 'step': 5444, 'epoch': 3} {'type': 'loss', 'content': 0.00011262608313700184, 'timestamp': '2025-09-10 02:31:28.017193', 'step': 5445, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:28.046305', 'step': 5445, 'epoch': 3} {'type': 'loss', 'content': 0.00039159305742941797, 'timestamp': '2025-09-10 02:31:28.048214', 'step': 5446, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:28.076983', 'step': 5446, 'epoch': 3} {'type': 'loss', 'content': 0.0006352835334837437, 'timestamp': '2025-09-10 02:31:28.079035', 'step': 5447, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:28.107689', 'step': 5447, 'epoch': 3} {'type': 'loss', 'content': 0.002199439564719796, 'timestamp': '2025-09-10 02:31:28.133621', 'step': 5448, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:28.162487', 'step': 5448, 'epoch': 3} {'type': 'loss', 'content': 0.013469723053276539, 'timestamp': '2025-09-10 02:31:28.164474', 'step': 5449, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:28.193270', 'step': 5449, 'epoch': 3} {'type': 'loss', 'content': 0.017483588308095932, 'timestamp': '2025-09-10 02:31:28.195148', 'step': 5450, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:28.224483', 'step': 5450, 'epoch': 3} {'type': 'loss', 'content': 0.0002191327657783404, 'timestamp': '2025-09-10 02:31:28.226292', 'step': 5451, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:28.255419', 'step': 5451, 'epoch': 3} {'type': 'loss', 'content': 0.0001765905908541754, 'timestamp': '2025-09-10 02:31:28.278771', 'step': 5452, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:28.307886', 'step': 5452, 'epoch': 3} {'type': 'loss', 'content': 0.00013448033132590353, 'timestamp': '2025-09-10 02:31:28.310131', 'step': 5453, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:28.339054', 'step': 5453, 'epoch': 3} {'type': 'loss', 'content': 0.005181503016501665, 'timestamp': '2025-09-10 02:31:28.341227', 'step': 5454, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:28.370507', 'step': 5454, 'epoch': 3} {'type': 'loss', 'content': 0.00517942663282156, 'timestamp': '2025-09-10 02:31:28.373346', 'step': 5455, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:28.402330', 'step': 5455, 'epoch': 3} {'type': 'loss', 'content': 0.006793874315917492, 'timestamp': '2025-09-10 02:31:28.425930', 'step': 5456, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:28.454436', 'step': 5456, 'epoch': 3} {'type': 'loss', 'content': 0.00022978255583439022, 'timestamp': '2025-09-10 02:31:28.456438', 'step': 5457, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:28.485551', 'step': 5457, 'epoch': 3} {'type': 'loss', 'content': 0.001192291034385562, 'timestamp': '2025-09-10 02:31:28.488072', 'step': 5458, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:28.517368', 'step': 5458, 'epoch': 3} {'type': 'loss', 'content': 0.005229536443948746, 'timestamp': '2025-09-10 02:31:28.519342', 'step': 5459, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:28.549096', 'step': 5459, 'epoch': 3} {'type': 'loss', 'content': 0.001903381198644638, 'timestamp': '2025-09-10 02:31:28.572989', 'step': 5460, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:28.602682', 'step': 5460, 'epoch': 3} {'type': 'loss', 'content': 0.001068693003617227, 'timestamp': '2025-09-10 02:31:28.604858', 'step': 5461, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:28.634936', 'step': 5461, 'epoch': 3} {'type': 'loss', 'content': 0.002789911115542054, 'timestamp': '2025-09-10 02:31:28.636955', 'step': 5462, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:28.666027', 'step': 5462, 'epoch': 3} {'type': 'loss', 'content': 0.0007820177124813199, 'timestamp': '2025-09-10 02:31:28.668117', 'step': 5463, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:28.696562', 'step': 5463, 'epoch': 3} {'type': 'loss', 'content': 0.0049674129113554955, 'timestamp': '2025-09-10 02:31:28.720063', 'step': 5464, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:28.749006', 'step': 5464, 'epoch': 3} {'type': 'loss', 'content': 0.00047011018614284694, 'timestamp': '2025-09-10 02:31:28.751058', 'step': 5465, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:28.780409', 'step': 5465, 'epoch': 3} {'type': 'loss', 'content': 0.013229615055024624, 'timestamp': '2025-09-10 02:31:28.782356', 'step': 5466, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:28.811414', 'step': 5466, 'epoch': 3} {'type': 'loss', 'content': 0.0001901341456687078, 'timestamp': '2025-09-10 02:31:28.813414', 'step': 5467, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:28.842356', 'step': 5467, 'epoch': 3} {'type': 'loss', 'content': 0.00024926214246079326, 'timestamp': '2025-09-10 02:31:28.866168', 'step': 5468, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:28.895631', 'step': 5468, 'epoch': 3} {'type': 'loss', 'content': 0.009387032128870487, 'timestamp': '2025-09-10 02:31:28.897605', 'step': 5469, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:28.926679', 'step': 5469, 'epoch': 3} {'type': 'loss', 'content': 0.0005769426352344453, 'timestamp': '2025-09-10 02:31:28.928665', 'step': 5470, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:28.957528', 'step': 5470, 'epoch': 3} {'type': 'loss', 'content': 0.0049220966175198555, 'timestamp': '2025-09-10 02:31:28.959662', 'step': 5471, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:28.988579', 'step': 5471, 'epoch': 3} {'type': 'loss', 'content': 0.0017110798507928848, 'timestamp': '2025-09-10 02:31:29.012069', 'step': 5472, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1582003754624}], 'timestamp': '2025-09-10 02:31:31.144539', 'step': 5472, 'epoch': 3} {'type': 'pplx', 'content': 2678997.8009577156, 'timestamp': '2025-09-10 02:31:31.146492', 'step': 5472, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:31.175273', 'step': 5472, 'epoch': 3} {'type': 'loss', 'content': 0.04426982253789902, 'timestamp': '2025-09-10 02:31:31.177258', 'step': 5473, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:31.206905', 'step': 5473, 'epoch': 3} {'type': 'loss', 'content': 0.023119186982512474, 'timestamp': '2025-09-10 02:31:31.208802', 'step': 5474, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:31.237861', 'step': 5474, 'epoch': 3} {'type': 'loss', 'content': 0.00042799237417057157, 'timestamp': '2025-09-10 02:31:31.240169', 'step': 5475, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:31.269421', 'step': 5475, 'epoch': 3} {'type': 'loss', 'content': 9.680674702394754e-05, 'timestamp': '2025-09-10 02:31:31.293075', 'step': 5476, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:31.321893', 'step': 5476, 'epoch': 3} {'type': 'loss', 'content': 0.0021623498760163784, 'timestamp': '2025-09-10 02:31:31.323995', 'step': 5477, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:31.352694', 'step': 5477, 'epoch': 3} {'type': 'loss', 'content': 0.0004191426560282707, 'timestamp': '2025-09-10 02:31:31.354790', 'step': 5478, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:31.383501', 'step': 5478, 'epoch': 3} {'type': 'loss', 'content': 4.297324994695373e-05, 'timestamp': '2025-09-10 02:31:31.385571', 'step': 5479, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:31.414170', 'step': 5479, 'epoch': 3} {'type': 'loss', 'content': 0.046055715531110764, 'timestamp': '2025-09-10 02:31:31.437751', 'step': 5480, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:31.466686', 'step': 5480, 'epoch': 3} {'type': 'loss', 'content': 0.0018844603328034282, 'timestamp': '2025-09-10 02:31:31.468842', 'step': 5481, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:31.497575', 'step': 5481, 'epoch': 3} {'type': 'loss', 'content': 0.0012828157050535083, 'timestamp': '2025-09-10 02:31:31.499538', 'step': 5482, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:31.528295', 'step': 5482, 'epoch': 3} {'type': 'loss', 'content': 0.00011052561603719369, 'timestamp': '2025-09-10 02:31:31.530201', 'step': 5483, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:31.558870', 'step': 5483, 'epoch': 3} {'type': 'loss', 'content': 0.0008187596104107797, 'timestamp': '2025-09-10 02:31:31.582458', 'step': 5484, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:31.611416', 'step': 5484, 'epoch': 3} {'type': 'loss', 'content': 0.00011922647536266595, 'timestamp': '2025-09-10 02:31:31.613298', 'step': 5485, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:31.642297', 'step': 5485, 'epoch': 3} {'type': 'loss', 'content': 0.008758625946938992, 'timestamp': '2025-09-10 02:31:31.644233', 'step': 5486, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:31.673146', 'step': 5486, 'epoch': 3} {'type': 'loss', 'content': 0.00020699352899100631, 'timestamp': '2025-09-10 02:31:31.675153', 'step': 5487, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:31.703751', 'step': 5487, 'epoch': 3} {'type': 'loss', 'content': 0.0006481898599304259, 'timestamp': '2025-09-10 02:31:31.727219', 'step': 5488, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:31.755910', 'step': 5488, 'epoch': 3} {'type': 'loss', 'content': 0.00026211151271127164, 'timestamp': '2025-09-10 02:31:31.757785', 'step': 5489, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:31.786360', 'step': 5489, 'epoch': 3} {'type': 'loss', 'content': 0.005063667893409729, 'timestamp': '2025-09-10 02:31:31.788379', 'step': 5490, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:31.817161', 'step': 5490, 'epoch': 3} {'type': 'loss', 'content': 0.06866239011287689, 'timestamp': '2025-09-10 02:31:31.819107', 'step': 5491, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:31.847617', 'step': 5491, 'epoch': 3} {'type': 'loss', 'content': 0.005610947962850332, 'timestamp': '2025-09-10 02:31:31.871282', 'step': 5492, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:31.900024', 'step': 5492, 'epoch': 3} {'type': 'loss', 'content': 9.809240873437375e-05, 'timestamp': '2025-09-10 02:31:31.902020', 'step': 5493, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:31.930407', 'step': 5493, 'epoch': 3} {'type': 'loss', 'content': 0.0005431465106084943, 'timestamp': '2025-09-10 02:31:31.932359', 'step': 5494, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:31.961192', 'step': 5494, 'epoch': 3} {'type': 'loss', 'content': 0.049388524144887924, 'timestamp': '2025-09-10 02:31:31.963537', 'step': 5495, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:31.992105', 'step': 5495, 'epoch': 3} {'type': 'loss', 'content': 0.04355853796005249, 'timestamp': '2025-09-10 02:31:32.017022', 'step': 5496, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:32.046096', 'step': 5496, 'epoch': 3} {'type': 'loss', 'content': 0.00019289494957774878, 'timestamp': '2025-09-10 02:31:32.048115', 'step': 5497, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:32.076691', 'step': 5497, 'epoch': 3} {'type': 'loss', 'content': 0.00040106798405759037, 'timestamp': '2025-09-10 02:31:32.078616', 'step': 5498, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:32.107028', 'step': 5498, 'epoch': 3} {'type': 'loss', 'content': 0.0005221262690611184, 'timestamp': '2025-09-10 02:31:32.108971', 'step': 5499, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:32.137655', 'step': 5499, 'epoch': 3} {'type': 'loss', 'content': 0.00019806763157248497, 'timestamp': '2025-09-10 02:31:32.161120', 'step': 5500, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 5500', 'timestamp': '2025-09-10 02:31:36.469939', 'step': 5500, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:36.500694', 'step': 5500, 'epoch': 3} {'type': 'loss', 'content': 0.00017940241377800703, 'timestamp': '2025-09-10 02:31:36.502773', 'step': 5501, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:36.532246', 'step': 5501, 'epoch': 3} {'type': 'loss', 'content': 0.0007619414827786386, 'timestamp': '2025-09-10 02:31:36.534214', 'step': 5502, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:36.563383', 'step': 5502, 'epoch': 3} {'type': 'loss', 'content': 0.0015142976772040129, 'timestamp': '2025-09-10 02:31:36.565867', 'step': 5503, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:36.594870', 'step': 5503, 'epoch': 3} {'type': 'loss', 'content': 0.001050888909958303, 'timestamp': '2025-09-10 02:31:36.618879', 'step': 5504, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:36.648030', 'step': 5504, 'epoch': 3} {'type': 'loss', 'content': 0.0003561497724149376, 'timestamp': '2025-09-10 02:31:36.650493', 'step': 5505, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:36.680271', 'step': 5505, 'epoch': 3} {'type': 'loss', 'content': 0.0011121274437755346, 'timestamp': '2025-09-10 02:31:36.682192', 'step': 5506, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:36.711019', 'step': 5506, 'epoch': 3} {'type': 'loss', 'content': 0.00017145891615655273, 'timestamp': '2025-09-10 02:31:36.713232', 'step': 5507, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:36.742917', 'step': 5507, 'epoch': 3} {'type': 'loss', 'content': 0.011950437910854816, 'timestamp': '2025-09-10 02:31:36.766327', 'step': 5508, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:36.794960', 'step': 5508, 'epoch': 3} {'type': 'loss', 'content': 8.530188642907888e-05, 'timestamp': '2025-09-10 02:31:36.796867', 'step': 5509, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:36.825483', 'step': 5509, 'epoch': 3} {'type': 'loss', 'content': 0.02681620605289936, 'timestamp': '2025-09-10 02:31:36.827783', 'step': 5510, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:36.857136', 'step': 5510, 'epoch': 3} {'type': 'loss', 'content': 0.0032190450001507998, 'timestamp': '2025-09-10 02:31:36.859190', 'step': 5511, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:36.888137', 'step': 5511, 'epoch': 3} {'type': 'loss', 'content': 0.0009218252380378544, 'timestamp': '2025-09-10 02:31:36.911653', 'step': 5512, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:36.944105', 'step': 5512, 'epoch': 3} {'type': 'loss', 'content': 0.0005729981930926442, 'timestamp': '2025-09-10 02:31:36.945948', 'step': 5513, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:36.974995', 'step': 5513, 'epoch': 3} {'type': 'loss', 'content': 0.014555713161826134, 'timestamp': '2025-09-10 02:31:36.977012', 'step': 5514, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:37.005690', 'step': 5514, 'epoch': 3} {'type': 'loss', 'content': 0.027327081188559532, 'timestamp': '2025-09-10 02:31:37.007934', 'step': 5515, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:37.036579', 'step': 5515, 'epoch': 3} {'type': 'loss', 'content': 0.0005217579309828579, 'timestamp': '2025-09-10 02:31:37.060221', 'step': 5516, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:37.089693', 'step': 5516, 'epoch': 3} {'type': 'loss', 'content': 0.007404877804219723, 'timestamp': '2025-09-10 02:31:37.091674', 'step': 5517, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:37.120560', 'step': 5517, 'epoch': 3} {'type': 'loss', 'content': 0.0016886789817363024, 'timestamp': '2025-09-10 02:31:37.123877', 'step': 5518, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:37.153073', 'step': 5518, 'epoch': 3} {'type': 'loss', 'content': 0.0016022982308641076, 'timestamp': '2025-09-10 02:31:37.155176', 'step': 5519, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:37.183978', 'step': 5519, 'epoch': 3} {'type': 'loss', 'content': 0.0008979992708191276, 'timestamp': '2025-09-10 02:31:37.207615', 'step': 5520, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:37.236546', 'step': 5520, 'epoch': 3} {'type': 'loss', 'content': 0.0015828582691028714, 'timestamp': '2025-09-10 02:31:37.238416', 'step': 5521, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:37.267180', 'step': 5521, 'epoch': 3} {'type': 'loss', 'content': 0.000788357516285032, 'timestamp': '2025-09-10 02:31:37.269264', 'step': 5522, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:37.298092', 'step': 5522, 'epoch': 3} {'type': 'loss', 'content': 0.0002860610547941178, 'timestamp': '2025-09-10 02:31:37.300013', 'step': 5523, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:37.328622', 'step': 5523, 'epoch': 3} {'type': 'loss', 'content': 0.0002962402650155127, 'timestamp': '2025-09-10 02:31:37.352070', 'step': 5524, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:37.381132', 'step': 5524, 'epoch': 3} {'type': 'loss', 'content': 0.0002550986537244171, 'timestamp': '2025-09-10 02:31:37.383123', 'step': 5525, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:37.411856', 'step': 5525, 'epoch': 3} {'type': 'loss', 'content': 0.0012577223824337125, 'timestamp': '2025-09-10 02:31:37.413694', 'step': 5526, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:37.442658', 'step': 5526, 'epoch': 3} {'type': 'loss', 'content': 0.0015119662275537848, 'timestamp': '2025-09-10 02:31:37.444540', 'step': 5527, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:37.473192', 'step': 5527, 'epoch': 3} {'type': 'loss', 'content': 0.0013533987803384662, 'timestamp': '2025-09-10 02:31:37.496516', 'step': 5528, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:37.525880', 'step': 5528, 'epoch': 3} {'type': 'loss', 'content': 0.007477410137653351, 'timestamp': '2025-09-10 02:31:37.527877', 'step': 5529, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:37.556486', 'step': 5529, 'epoch': 3} {'type': 'loss', 'content': 0.000659845769405365, 'timestamp': '2025-09-10 02:31:37.558440', 'step': 5530, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:37.587192', 'step': 5530, 'epoch': 3} {'type': 'loss', 'content': 0.00010641255357768387, 'timestamp': '2025-09-10 02:31:37.589061', 'step': 5531, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:37.617827', 'step': 5531, 'epoch': 3} {'type': 'loss', 'content': 0.00041397142922505736, 'timestamp': '2025-09-10 02:31:37.641401', 'step': 5532, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:37.670874', 'step': 5532, 'epoch': 3} {'type': 'loss', 'content': 0.0022644626442342997, 'timestamp': '2025-09-10 02:31:37.673112', 'step': 5533, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:37.704029', 'step': 5533, 'epoch': 3} {'type': 'loss', 'content': 0.00990013126283884, 'timestamp': '2025-09-10 02:31:37.706077', 'step': 5534, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:37.734718', 'step': 5534, 'epoch': 3} {'type': 'loss', 'content': 0.0009291375754401088, 'timestamp': '2025-09-10 02:31:37.737044', 'step': 5535, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:37.766338', 'step': 5535, 'epoch': 3} {'type': 'loss', 'content': 0.0009419794077984989, 'timestamp': '2025-09-10 02:31:37.789976', 'step': 5536, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:37.818975', 'step': 5536, 'epoch': 3} {'type': 'loss', 'content': 0.00018296926282346249, 'timestamp': '2025-09-10 02:31:37.820826', 'step': 5537, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:37.849359', 'step': 5537, 'epoch': 3} {'type': 'loss', 'content': 0.0002313188451807946, 'timestamp': '2025-09-10 02:31:37.851327', 'step': 5538, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:37.880440', 'step': 5538, 'epoch': 3} {'type': 'loss', 'content': 0.009750072844326496, 'timestamp': '2025-09-10 02:31:37.882429', 'step': 5539, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:37.911477', 'step': 5539, 'epoch': 3} {'type': 'loss', 'content': 0.001343383570201695, 'timestamp': '2025-09-10 02:31:37.935027', 'step': 5540, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:37.964208', 'step': 5540, 'epoch': 3} {'type': 'loss', 'content': 0.0014413215685635805, 'timestamp': '2025-09-10 02:31:37.966078', 'step': 5541, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:37.994803', 'step': 5541, 'epoch': 3} {'type': 'loss', 'content': 0.00017327792011201382, 'timestamp': '2025-09-10 02:31:37.996703', 'step': 5542, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:38.025334', 'step': 5542, 'epoch': 3} {'type': 'loss', 'content': 0.000250210432568565, 'timestamp': '2025-09-10 02:31:38.027225', 'step': 5543, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:38.055921', 'step': 5543, 'epoch': 3} {'type': 'loss', 'content': 0.00019978173077106476, 'timestamp': '2025-09-10 02:31:38.079378', 'step': 5544, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:38.107987', 'step': 5544, 'epoch': 3} {'type': 'loss', 'content': 0.0015174195868894458, 'timestamp': '2025-09-10 02:31:38.109981', 'step': 5545, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:38.138532', 'step': 5545, 'epoch': 3} {'type': 'loss', 'content': 0.003827376291155815, 'timestamp': '2025-09-10 02:31:38.140393', 'step': 5546, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:38.169730', 'step': 5546, 'epoch': 3} {'type': 'loss', 'content': 0.00042572617530822754, 'timestamp': '2025-09-10 02:31:38.171836', 'step': 5547, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:38.202721', 'step': 5547, 'epoch': 3} {'type': 'loss', 'content': 0.023560727015137672, 'timestamp': '2025-09-10 02:31:38.226345', 'step': 5548, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:38.256053', 'step': 5548, 'epoch': 3} {'type': 'loss', 'content': 0.0008844585972838104, 'timestamp': '2025-09-10 02:31:38.258014', 'step': 5549, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:38.287223', 'step': 5549, 'epoch': 3} {'type': 'loss', 'content': 0.0008046877337619662, 'timestamp': '2025-09-10 02:31:38.289098', 'step': 5550, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:38.318237', 'step': 5550, 'epoch': 3} {'type': 'loss', 'content': 0.004426827188581228, 'timestamp': '2025-09-10 02:31:38.320254', 'step': 5551, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:38.349404', 'step': 5551, 'epoch': 3} {'type': 'loss', 'content': 0.0012104782508686185, 'timestamp': '2025-09-10 02:31:38.372979', 'step': 5552, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:38.402790', 'step': 5552, 'epoch': 3} {'type': 'loss', 'content': 0.00035136216320097446, 'timestamp': '2025-09-10 02:31:38.404678', 'step': 5553, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:38.433403', 'step': 5553, 'epoch': 3} {'type': 'loss', 'content': 0.0009261313825845718, 'timestamp': '2025-09-10 02:31:38.435411', 'step': 5554, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:38.463835', 'step': 5554, 'epoch': 3} {'type': 'loss', 'content': 0.0003515938005875796, 'timestamp': '2025-09-10 02:31:38.465680', 'step': 5555, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:38.494063', 'step': 5555, 'epoch': 3} {'type': 'loss', 'content': 8.098541002254933e-05, 'timestamp': '2025-09-10 02:31:38.517458', 'step': 5556, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:38.546566', 'step': 5556, 'epoch': 3} {'type': 'loss', 'content': 0.004003607667982578, 'timestamp': '2025-09-10 02:31:38.548586', 'step': 5557, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:38.577592', 'step': 5557, 'epoch': 3} {'type': 'loss', 'content': 0.0034106073435395956, 'timestamp': '2025-09-10 02:31:38.579418', 'step': 5558, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:38.607492', 'step': 5558, 'epoch': 3} {'type': 'loss', 'content': 0.00020386728283483535, 'timestamp': '2025-09-10 02:31:38.609556', 'step': 5559, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:38.639061', 'step': 5559, 'epoch': 3} {'type': 'loss', 'content': 0.0012458580313250422, 'timestamp': '2025-09-10 02:31:38.662476', 'step': 5560, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:38.691187', 'step': 5560, 'epoch': 3} {'type': 'loss', 'content': 0.0059364731423556805, 'timestamp': '2025-09-10 02:31:38.693245', 'step': 5561, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:38.722834', 'step': 5561, 'epoch': 3} {'type': 'loss', 'content': 0.0035293761175125837, 'timestamp': '2025-09-10 02:31:38.725144', 'step': 5562, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:38.754230', 'step': 5562, 'epoch': 3} {'type': 'loss', 'content': 0.0002680100442375988, 'timestamp': '2025-09-10 02:31:38.756216', 'step': 5563, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:38.785083', 'step': 5563, 'epoch': 3} {'type': 'loss', 'content': 0.002597348066046834, 'timestamp': '2025-09-10 02:31:38.808524', 'step': 5564, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:38.837864', 'step': 5564, 'epoch': 3} {'type': 'loss', 'content': 0.00014817291230428964, 'timestamp': '2025-09-10 02:31:38.839884', 'step': 5565, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:38.868866', 'step': 5565, 'epoch': 3} {'type': 'loss', 'content': 9.81853881967254e-05, 'timestamp': '2025-09-10 02:31:38.870909', 'step': 5566, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:38.899810', 'step': 5566, 'epoch': 3} {'type': 'loss', 'content': 0.013567962683737278, 'timestamp': '2025-09-10 02:31:38.901709', 'step': 5567, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:38.930354', 'step': 5567, 'epoch': 3} {'type': 'loss', 'content': 0.00038412483991123736, 'timestamp': '2025-09-10 02:31:38.953775', 'step': 5568, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:38.982570', 'step': 5568, 'epoch': 3} {'type': 'loss', 'content': 0.0004490498104132712, 'timestamp': '2025-09-10 02:31:38.984379', 'step': 5569, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:39.013393', 'step': 5569, 'epoch': 3} {'type': 'loss', 'content': 0.0004223698633722961, 'timestamp': '2025-09-10 02:31:39.015470', 'step': 5570, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:39.044805', 'step': 5570, 'epoch': 3} {'type': 'loss', 'content': 0.004776694346219301, 'timestamp': '2025-09-10 02:31:39.046909', 'step': 5571, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:39.075707', 'step': 5571, 'epoch': 3} {'type': 'loss', 'content': 0.00011601136066019535, 'timestamp': '2025-09-10 02:31:39.099137', 'step': 5572, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:39.128263', 'step': 5572, 'epoch': 3} {'type': 'loss', 'content': 0.00019144189718645066, 'timestamp': '2025-09-10 02:31:39.130070', 'step': 5573, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:39.158850', 'step': 5573, 'epoch': 3} {'type': 'loss', 'content': 0.007319327909499407, 'timestamp': '2025-09-10 02:31:39.160747', 'step': 5574, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:39.189698', 'step': 5574, 'epoch': 3} {'type': 'loss', 'content': 0.0020018171053379774, 'timestamp': '2025-09-10 02:31:39.191672', 'step': 5575, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:39.220346', 'step': 5575, 'epoch': 3} {'type': 'loss', 'content': 0.00011518342944327742, 'timestamp': '2025-09-10 02:31:39.243778', 'step': 5576, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:39.272722', 'step': 5576, 'epoch': 3} {'type': 'loss', 'content': 0.0025209735613316298, 'timestamp': '2025-09-10 02:31:39.274906', 'step': 5577, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:39.303887', 'step': 5577, 'epoch': 3} {'type': 'loss', 'content': 0.0043420311994850636, 'timestamp': '2025-09-10 02:31:39.306238', 'step': 5578, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:39.335720', 'step': 5578, 'epoch': 3} {'type': 'loss', 'content': 0.0002620189916342497, 'timestamp': '2025-09-10 02:31:39.337796', 'step': 5579, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:39.366805', 'step': 5579, 'epoch': 3} {'type': 'loss', 'content': 0.008588447235524654, 'timestamp': '2025-09-10 02:31:39.390180', 'step': 5580, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:39.420049', 'step': 5580, 'epoch': 3} {'type': 'loss', 'content': 0.000151693748193793, 'timestamp': '2025-09-10 02:31:39.421890', 'step': 5581, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:39.450942', 'step': 5581, 'epoch': 3} {'type': 'loss', 'content': 0.00013074011076241732, 'timestamp': '2025-09-10 02:31:39.452832', 'step': 5582, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:39.481813', 'step': 5582, 'epoch': 3} {'type': 'loss', 'content': 0.0018191634444519877, 'timestamp': '2025-09-10 02:31:39.483876', 'step': 5583, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:39.512982', 'step': 5583, 'epoch': 3} {'type': 'loss', 'content': 5.886574581381865e-05, 'timestamp': '2025-09-10 02:31:39.536497', 'step': 5584, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:39.565344', 'step': 5584, 'epoch': 3} {'type': 'loss', 'content': 0.000427374237915501, 'timestamp': '2025-09-10 02:31:39.567419', 'step': 5585, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:39.596530', 'step': 5585, 'epoch': 3} {'type': 'loss', 'content': 0.00014232970715966076, 'timestamp': '2025-09-10 02:31:39.598484', 'step': 5586, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:39.627055', 'step': 5586, 'epoch': 3} {'type': 'loss', 'content': 0.0010161589598283172, 'timestamp': '2025-09-10 02:31:39.629116', 'step': 5587, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:39.657863', 'step': 5587, 'epoch': 3} {'type': 'loss', 'content': 0.004650202579796314, 'timestamp': '2025-09-10 02:31:39.681381', 'step': 5588, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:39.709693', 'step': 5588, 'epoch': 3} {'type': 'loss', 'content': 0.0057308003306388855, 'timestamp': '2025-09-10 02:31:39.711798', 'step': 5589, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:39.740561', 'step': 5589, 'epoch': 3} {'type': 'loss', 'content': 0.0004541428934317082, 'timestamp': '2025-09-10 02:31:39.742519', 'step': 5590, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:39.771333', 'step': 5590, 'epoch': 3} {'type': 'loss', 'content': 4.5271222916198894e-05, 'timestamp': '2025-09-10 02:31:39.773370', 'step': 5591, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:39.802756', 'step': 5591, 'epoch': 3} {'type': 'loss', 'content': 0.014283773489296436, 'timestamp': '2025-09-10 02:31:39.826378', 'step': 5592, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:39.855303', 'step': 5592, 'epoch': 3} {'type': 'loss', 'content': 0.0002525447343941778, 'timestamp': '2025-09-10 02:31:39.857233', 'step': 5593, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:39.886053', 'step': 5593, 'epoch': 3} {'type': 'loss', 'content': 0.0008836208726279438, 'timestamp': '2025-09-10 02:31:39.887931', 'step': 5594, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:39.916544', 'step': 5594, 'epoch': 3} {'type': 'loss', 'content': 0.00010469827248016372, 'timestamp': '2025-09-10 02:31:39.919956', 'step': 5595, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:39.951866', 'step': 5595, 'epoch': 3} {'type': 'loss', 'content': 0.00025571396690793335, 'timestamp': '2025-09-10 02:31:39.975328', 'step': 5596, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:40.005137', 'step': 5596, 'epoch': 3} {'type': 'loss', 'content': 0.0001397687301505357, 'timestamp': '2025-09-10 02:31:40.008163', 'step': 5597, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:40.038157', 'step': 5597, 'epoch': 3} {'type': 'loss', 'content': 0.012690062634646893, 'timestamp': '2025-09-10 02:31:40.040057', 'step': 5598, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:40.068691', 'step': 5598, 'epoch': 3} {'type': 'loss', 'content': 0.0011016997741535306, 'timestamp': '2025-09-10 02:31:40.070526', 'step': 5599, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:40.099241', 'step': 5599, 'epoch': 3} {'type': 'loss', 'content': 0.0002454652276355773, 'timestamp': '2025-09-10 02:31:40.122630', 'step': 5600, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:40.151811', 'step': 5600, 'epoch': 3} {'type': 'loss', 'content': 0.021332427859306335, 'timestamp': '2025-09-10 02:31:40.153592', 'step': 5601, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:40.182392', 'step': 5601, 'epoch': 3} {'type': 'loss', 'content': 0.00016384133778046817, 'timestamp': '2025-09-10 02:31:40.184137', 'step': 5602, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:40.212691', 'step': 5602, 'epoch': 3} {'type': 'loss', 'content': 7.321812881855294e-05, 'timestamp': '2025-09-10 02:31:40.214781', 'step': 5603, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:40.243470', 'step': 5603, 'epoch': 3} {'type': 'loss', 'content': 0.0007486808462999761, 'timestamp': '2025-09-10 02:31:40.266794', 'step': 5604, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:40.295633', 'step': 5604, 'epoch': 3} {'type': 'loss', 'content': 3.3376003557350487e-05, 'timestamp': '2025-09-10 02:31:40.297563', 'step': 5605, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:40.326652', 'step': 5605, 'epoch': 3} {'type': 'loss', 'content': 0.0007316760602407157, 'timestamp': '2025-09-10 02:31:40.328577', 'step': 5606, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:40.357520', 'step': 5606, 'epoch': 3} {'type': 'loss', 'content': 0.0020737769082188606, 'timestamp': '2025-09-10 02:31:40.359640', 'step': 5607, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:40.388693', 'step': 5607, 'epoch': 3} {'type': 'loss', 'content': 0.00014511286281049252, 'timestamp': '2025-09-10 02:31:40.412316', 'step': 5608, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:40.441442', 'step': 5608, 'epoch': 3} {'type': 'loss', 'content': 0.0005348823615349829, 'timestamp': '2025-09-10 02:31:40.443377', 'step': 5609, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:40.472212', 'step': 5609, 'epoch': 3} {'type': 'loss', 'content': 0.00016290562052745372, 'timestamp': '2025-09-10 02:31:40.474036', 'step': 5610, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:40.503205', 'step': 5610, 'epoch': 3} {'type': 'loss', 'content': 0.0005925578298047185, 'timestamp': '2025-09-10 02:31:40.505223', 'step': 5611, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:40.533963', 'step': 5611, 'epoch': 3} {'type': 'loss', 'content': 7.499481580452994e-05, 'timestamp': '2025-09-10 02:31:40.557309', 'step': 5612, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:40.587260', 'step': 5612, 'epoch': 3} {'type': 'loss', 'content': 0.00024712338927201927, 'timestamp': '2025-09-10 02:31:40.589287', 'step': 5613, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:40.618337', 'step': 5613, 'epoch': 3} {'type': 'loss', 'content': 7.08239313098602e-05, 'timestamp': '2025-09-10 02:31:40.620209', 'step': 5614, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:40.649286', 'step': 5614, 'epoch': 3} {'type': 'loss', 'content': 0.00010430154361529276, 'timestamp': '2025-09-10 02:31:40.651104', 'step': 5615, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:40.679819', 'step': 5615, 'epoch': 3} {'type': 'loss', 'content': 0.002154013141989708, 'timestamp': '2025-09-10 02:31:40.703579', 'step': 5616, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:40.732286', 'step': 5616, 'epoch': 3} {'type': 'loss', 'content': 0.007276777643710375, 'timestamp': '2025-09-10 02:31:40.734264', 'step': 5617, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:40.762943', 'step': 5617, 'epoch': 3} {'type': 'loss', 'content': 0.00033290876308456063, 'timestamp': '2025-09-10 02:31:40.764602', 'step': 5618, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:40.793096', 'step': 5618, 'epoch': 3} {'type': 'loss', 'content': 8.302512287627906e-05, 'timestamp': '2025-09-10 02:31:40.794950', 'step': 5619, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:40.823374', 'step': 5619, 'epoch': 3} {'type': 'loss', 'content': 0.05603466182947159, 'timestamp': '2025-09-10 02:31:40.846962', 'step': 5620, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:40.875806', 'step': 5620, 'epoch': 3} {'type': 'loss', 'content': 0.0015819476684555411, 'timestamp': '2025-09-10 02:31:40.878005', 'step': 5621, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:40.906769', 'step': 5621, 'epoch': 3} {'type': 'loss', 'content': 0.000151911357534118, 'timestamp': '2025-09-10 02:31:40.908671', 'step': 5622, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:40.936861', 'step': 5622, 'epoch': 3} {'type': 'loss', 'content': 0.04918529838323593, 'timestamp': '2025-09-10 02:31:40.938667', 'step': 5623, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:40.967213', 'step': 5623, 'epoch': 3} {'type': 'loss', 'content': 0.00021279227803461254, 'timestamp': '2025-09-10 02:31:40.990567', 'step': 5624, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1582003754624}], 'timestamp': '2025-09-10 02:31:42.843387', 'step': 5624, 'epoch': 3} {'type': 'pplx', 'content': 2814016.08539282, 'timestamp': '2025-09-10 02:31:42.845309', 'step': 5624, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:42.874693', 'step': 5624, 'epoch': 3} {'type': 'loss', 'content': 0.00011733026622096077, 'timestamp': '2025-09-10 02:31:42.876852', 'step': 5625, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:42.905969', 'step': 5625, 'epoch': 3} {'type': 'loss', 'content': 7.083137461449951e-05, 'timestamp': '2025-09-10 02:31:42.907990', 'step': 5626, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:42.936869', 'step': 5626, 'epoch': 3} {'type': 'loss', 'content': 0.0006454582908190787, 'timestamp': '2025-09-10 02:31:42.938835', 'step': 5627, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:42.968156', 'step': 5627, 'epoch': 3} {'type': 'loss', 'content': 8.90479568624869e-05, 'timestamp': '2025-09-10 02:31:42.991841', 'step': 5628, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:43.021223', 'step': 5628, 'epoch': 3} {'type': 'loss', 'content': 0.0027883213479071856, 'timestamp': '2025-09-10 02:31:43.023319', 'step': 5629, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:43.052636', 'step': 5629, 'epoch': 3} {'type': 'loss', 'content': 0.023280059918761253, 'timestamp': '2025-09-10 02:31:43.054710', 'step': 5630, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:43.084362', 'step': 5630, 'epoch': 3} {'type': 'loss', 'content': 0.0001530003355583176, 'timestamp': '2025-09-10 02:31:43.086365', 'step': 5631, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:43.115704', 'step': 5631, 'epoch': 3} {'type': 'loss', 'content': 0.0004718450072687119, 'timestamp': '2025-09-10 02:31:43.139178', 'step': 5632, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:43.168697', 'step': 5632, 'epoch': 3} {'type': 'loss', 'content': 0.0001157821825472638, 'timestamp': '2025-09-10 02:31:43.170735', 'step': 5633, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:43.199701', 'step': 5633, 'epoch': 3} {'type': 'loss', 'content': 0.00031720413244329393, 'timestamp': '2025-09-10 02:31:43.201518', 'step': 5634, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:43.230866', 'step': 5634, 'epoch': 3} {'type': 'loss', 'content': 0.0022304304875433445, 'timestamp': '2025-09-10 02:31:43.232883', 'step': 5635, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:43.262239', 'step': 5635, 'epoch': 3} {'type': 'loss', 'content': 0.00013950928405392915, 'timestamp': '2025-09-10 02:31:43.285561', 'step': 5636, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:43.314389', 'step': 5636, 'epoch': 3} {'type': 'loss', 'content': 0.0002411556924926117, 'timestamp': '2025-09-10 02:31:43.316349', 'step': 5637, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:43.345270', 'step': 5637, 'epoch': 3} {'type': 'loss', 'content': 0.0008664571796543896, 'timestamp': '2025-09-10 02:31:43.347009', 'step': 5638, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:43.376272', 'step': 5638, 'epoch': 3} {'type': 'loss', 'content': 8.972598152467981e-05, 'timestamp': '2025-09-10 02:31:43.378122', 'step': 5639, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:43.407095', 'step': 5639, 'epoch': 3} {'type': 'loss', 'content': 0.0002920062397606671, 'timestamp': '2025-09-10 02:31:43.430530', 'step': 5640, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:43.459457', 'step': 5640, 'epoch': 3} {'type': 'loss', 'content': 0.00020197234698571265, 'timestamp': '2025-09-10 02:31:43.461268', 'step': 5641, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:43.490235', 'step': 5641, 'epoch': 3} {'type': 'loss', 'content': 8.653556142235175e-05, 'timestamp': '2025-09-10 02:31:43.492030', 'step': 5642, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:43.520830', 'step': 5642, 'epoch': 3} {'type': 'loss', 'content': 0.08047207444906235, 'timestamp': '2025-09-10 02:31:43.523142', 'step': 5643, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:43.552596', 'step': 5643, 'epoch': 3} {'type': 'loss', 'content': 0.00019806034106295556, 'timestamp': '2025-09-10 02:31:43.576175', 'step': 5644, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:43.610721', 'step': 5644, 'epoch': 3} {'type': 'loss', 'content': 8.005097333807498e-05, 'timestamp': '2025-09-10 02:31:43.612601', 'step': 5645, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:43.645544', 'step': 5645, 'epoch': 3} {'type': 'loss', 'content': 8.392855670535937e-05, 'timestamp': '2025-09-10 02:31:43.647602', 'step': 5646, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:43.686377', 'step': 5646, 'epoch': 3} {'type': 'loss', 'content': 0.0001441091444576159, 'timestamp': '2025-09-10 02:31:43.688549', 'step': 5647, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:43.726579', 'step': 5647, 'epoch': 3} {'type': 'loss', 'content': 0.0011947358725592494, 'timestamp': '2025-09-10 02:31:43.750139', 'step': 5648, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:43.786021', 'step': 5648, 'epoch': 3} {'type': 'loss', 'content': 0.00032596822711639106, 'timestamp': '2025-09-10 02:31:43.787978', 'step': 5649, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:43.826629', 'step': 5649, 'epoch': 3} {'type': 'loss', 'content': 0.00025973832816816866, 'timestamp': '2025-09-10 02:31:43.828517', 'step': 5650, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:43.857307', 'step': 5650, 'epoch': 3} {'type': 'loss', 'content': 0.04941494017839432, 'timestamp': '2025-09-10 02:31:43.859295', 'step': 5651, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:43.888495', 'step': 5651, 'epoch': 3} {'type': 'loss', 'content': 0.0008920299587771297, 'timestamp': '2025-09-10 02:31:43.911974', 'step': 5652, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:43.940232', 'step': 5652, 'epoch': 3} {'type': 'loss', 'content': 0.0006275326013565063, 'timestamp': '2025-09-10 02:31:43.942024', 'step': 5653, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:43.970685', 'step': 5653, 'epoch': 3} {'type': 'loss', 'content': 0.0006861002766527236, 'timestamp': '2025-09-10 02:31:43.972663', 'step': 5654, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:44.001443', 'step': 5654, 'epoch': 3} {'type': 'loss', 'content': 0.010110259056091309, 'timestamp': '2025-09-10 02:31:44.003243', 'step': 5655, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:44.032526', 'step': 5655, 'epoch': 3} {'type': 'loss', 'content': 7.881731289671734e-05, 'timestamp': '2025-09-10 02:31:44.055964', 'step': 5656, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:44.084872', 'step': 5656, 'epoch': 3} {'type': 'loss', 'content': 0.0004487546975724399, 'timestamp': '2025-09-10 02:31:44.087014', 'step': 5657, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:44.115979', 'step': 5657, 'epoch': 3} {'type': 'loss', 'content': 0.000640097598079592, 'timestamp': '2025-09-10 02:31:44.118054', 'step': 5658, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:44.147238', 'step': 5658, 'epoch': 3} {'type': 'loss', 'content': 0.00021092274982947856, 'timestamp': '2025-09-10 02:31:44.149303', 'step': 5659, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:44.178211', 'step': 5659, 'epoch': 3} {'type': 'loss', 'content': 0.0008118998957797885, 'timestamp': '2025-09-10 02:31:44.201546', 'step': 5660, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:44.237925', 'step': 5660, 'epoch': 3} {'type': 'loss', 'content': 0.015752341598272324, 'timestamp': '2025-09-10 02:31:44.239656', 'step': 5661, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:44.267927', 'step': 5661, 'epoch': 3} {'type': 'loss', 'content': 0.00528930826112628, 'timestamp': '2025-09-10 02:31:44.269892', 'step': 5662, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:44.299095', 'step': 5662, 'epoch': 3} {'type': 'loss', 'content': 0.0009915790287777781, 'timestamp': '2025-09-10 02:31:44.300867', 'step': 5663, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:44.329553', 'step': 5663, 'epoch': 3} {'type': 'loss', 'content': 0.0002180993469664827, 'timestamp': '2025-09-10 02:31:44.352786', 'step': 5664, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:44.382266', 'step': 5664, 'epoch': 3} {'type': 'loss', 'content': 0.00041963360854424536, 'timestamp': '2025-09-10 02:31:44.384246', 'step': 5665, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:44.412826', 'step': 5665, 'epoch': 3} {'type': 'loss', 'content': 0.0043303873389959335, 'timestamp': '2025-09-10 02:31:44.414857', 'step': 5666, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:44.443701', 'step': 5666, 'epoch': 3} {'type': 'loss', 'content': 0.0033870781771838665, 'timestamp': '2025-09-10 02:31:44.445414', 'step': 5667, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:44.473507', 'step': 5667, 'epoch': 3} {'type': 'loss', 'content': 6.146782106952742e-05, 'timestamp': '2025-09-10 02:31:44.496975', 'step': 5668, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:44.526086', 'step': 5668, 'epoch': 3} {'type': 'loss', 'content': 0.0021099084988236427, 'timestamp': '2025-09-10 02:31:44.528000', 'step': 5669, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:44.556909', 'step': 5669, 'epoch': 3} {'type': 'loss', 'content': 0.0007611610344611108, 'timestamp': '2025-09-10 02:31:44.558660', 'step': 5670, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:44.587221', 'step': 5670, 'epoch': 3} {'type': 'loss', 'content': 0.0006143067148514092, 'timestamp': '2025-09-10 02:31:44.588982', 'step': 5671, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:44.617265', 'step': 5671, 'epoch': 3} {'type': 'loss', 'content': 0.0037563766818493605, 'timestamp': '2025-09-10 02:31:44.640788', 'step': 5672, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:44.669996', 'step': 5672, 'epoch': 3} {'type': 'loss', 'content': 8.10855271993205e-05, 'timestamp': '2025-09-10 02:31:44.671838', 'step': 5673, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:44.700530', 'step': 5673, 'epoch': 3} {'type': 'loss', 'content': 0.00016959061031229794, 'timestamp': '2025-09-10 02:31:44.702508', 'step': 5674, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:44.730994', 'step': 5674, 'epoch': 3} {'type': 'loss', 'content': 0.005033043213188648, 'timestamp': '2025-09-10 02:31:44.732701', 'step': 5675, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:44.760959', 'step': 5675, 'epoch': 3} {'type': 'loss', 'content': 0.0006991572445258498, 'timestamp': '2025-09-10 02:31:44.784300', 'step': 5676, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:44.813237', 'step': 5676, 'epoch': 3} {'type': 'loss', 'content': 0.00017229022341780365, 'timestamp': '2025-09-10 02:31:44.815200', 'step': 5677, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:44.844700', 'step': 5677, 'epoch': 3} {'type': 'loss', 'content': 0.05727619677782059, 'timestamp': '2025-09-10 02:31:44.846615', 'step': 5678, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:44.875188', 'step': 5678, 'epoch': 3} {'type': 'loss', 'content': 0.0001972148020286113, 'timestamp': '2025-09-10 02:31:44.877152', 'step': 5679, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:44.905222', 'step': 5679, 'epoch': 3} {'type': 'loss', 'content': 0.00023282249458134174, 'timestamp': '2025-09-10 02:31:44.928638', 'step': 5680, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:44.957941', 'step': 5680, 'epoch': 3} {'type': 'loss', 'content': 0.007196842692792416, 'timestamp': '2025-09-10 02:31:44.959819', 'step': 5681, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:44.988732', 'step': 5681, 'epoch': 3} {'type': 'loss', 'content': 0.00014656053099315614, 'timestamp': '2025-09-10 02:31:44.990476', 'step': 5682, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:45.020624', 'step': 5682, 'epoch': 3} {'type': 'loss', 'content': 0.00015152478590607643, 'timestamp': '2025-09-10 02:31:45.022868', 'step': 5683, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:45.052195', 'step': 5683, 'epoch': 3} {'type': 'loss', 'content': 0.0005283069331198931, 'timestamp': '2025-09-10 02:31:45.075387', 'step': 5684, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:45.104863', 'step': 5684, 'epoch': 3} {'type': 'loss', 'content': 0.003561926307156682, 'timestamp': '2025-09-10 02:31:45.106557', 'step': 5685, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:45.135150', 'step': 5685, 'epoch': 3} {'type': 'loss', 'content': 0.002516575623303652, 'timestamp': '2025-09-10 02:31:45.137208', 'step': 5686, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:45.167014', 'step': 5686, 'epoch': 3} {'type': 'loss', 'content': 8.199255535146222e-05, 'timestamp': '2025-09-10 02:31:45.168861', 'step': 5687, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:45.200179', 'step': 5687, 'epoch': 3} {'type': 'loss', 'content': 5.475278521771543e-05, 'timestamp': '2025-09-10 02:31:45.223740', 'step': 5688, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:45.256873', 'step': 5688, 'epoch': 3} {'type': 'loss', 'content': 0.000620243779849261, 'timestamp': '2025-09-10 02:31:45.259035', 'step': 5689, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:45.287935', 'step': 5689, 'epoch': 3} {'type': 'loss', 'content': 0.0006482009775936604, 'timestamp': '2025-09-10 02:31:45.290037', 'step': 5690, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:45.318725', 'step': 5690, 'epoch': 3} {'type': 'loss', 'content': 6.253737228689715e-05, 'timestamp': '2025-09-10 02:31:45.320919', 'step': 5691, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:45.353501', 'step': 5691, 'epoch': 3} {'type': 'loss', 'content': 0.00028412532992661, 'timestamp': '2025-09-10 02:31:45.376964', 'step': 5692, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:45.410709', 'step': 5692, 'epoch': 3} {'type': 'loss', 'content': 0.0006797489477321506, 'timestamp': '2025-09-10 02:31:45.412658', 'step': 5693, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:45.443951', 'step': 5693, 'epoch': 3} {'type': 'loss', 'content': 0.00047161945258267224, 'timestamp': '2025-09-10 02:31:45.445822', 'step': 5694, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:45.477767', 'step': 5694, 'epoch': 3} {'type': 'loss', 'content': 7.504104723921046e-05, 'timestamp': '2025-09-10 02:31:45.479546', 'step': 5695, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:45.513638', 'step': 5695, 'epoch': 3} {'type': 'loss', 'content': 8.778285700827837e-05, 'timestamp': '2025-09-10 02:31:45.537138', 'step': 5696, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:45.569648', 'step': 5696, 'epoch': 3} {'type': 'loss', 'content': 0.009277158416807652, 'timestamp': '2025-09-10 02:31:45.571474', 'step': 5697, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:45.605182', 'step': 5697, 'epoch': 3} {'type': 'loss', 'content': 0.0006705053965561092, 'timestamp': '2025-09-10 02:31:45.607223', 'step': 5698, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:45.641179', 'step': 5698, 'epoch': 3} {'type': 'loss', 'content': 0.0013212297344580293, 'timestamp': '2025-09-10 02:31:45.643176', 'step': 5699, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:45.677485', 'step': 5699, 'epoch': 3} {'type': 'loss', 'content': 0.00017160980496555567, 'timestamp': '2025-09-10 02:31:45.701106', 'step': 5700, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:45.738028', 'step': 5700, 'epoch': 3} {'type': 'loss', 'content': 0.00633283331990242, 'timestamp': '2025-09-10 02:31:45.740062', 'step': 5701, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:45.774151', 'step': 5701, 'epoch': 3} {'type': 'loss', 'content': 0.012610764242708683, 'timestamp': '2025-09-10 02:31:45.776272', 'step': 5702, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:45.813812', 'step': 5702, 'epoch': 3} {'type': 'loss', 'content': 0.07728030532598495, 'timestamp': '2025-09-10 02:31:45.815562', 'step': 5703, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:45.845637', 'step': 5703, 'epoch': 3} {'type': 'loss', 'content': 0.004781925585120916, 'timestamp': '2025-09-10 02:31:45.868996', 'step': 5704, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:45.898348', 'step': 5704, 'epoch': 3} {'type': 'loss', 'content': 0.04369015619158745, 'timestamp': '2025-09-10 02:31:45.900386', 'step': 5705, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:45.928989', 'step': 5705, 'epoch': 3} {'type': 'loss', 'content': 0.00011943600111408159, 'timestamp': '2025-09-10 02:31:45.930951', 'step': 5706, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:45.959727', 'step': 5706, 'epoch': 3} {'type': 'loss', 'content': 0.0006643111119046807, 'timestamp': '2025-09-10 02:31:45.961777', 'step': 5707, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:45.990230', 'step': 5707, 'epoch': 3} {'type': 'loss', 'content': 0.00022974215971771628, 'timestamp': '2025-09-10 02:31:46.013777', 'step': 5708, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:46.042707', 'step': 5708, 'epoch': 3} {'type': 'loss', 'content': 0.0005465334397740662, 'timestamp': '2025-09-10 02:31:46.044668', 'step': 5709, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:46.074526', 'step': 5709, 'epoch': 3} {'type': 'loss', 'content': 0.0023475922644138336, 'timestamp': '2025-09-10 02:31:46.076388', 'step': 5710, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:46.105245', 'step': 5710, 'epoch': 3} {'type': 'loss', 'content': 0.00036916168755851686, 'timestamp': '2025-09-10 02:31:46.107055', 'step': 5711, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:46.135601', 'step': 5711, 'epoch': 3} {'type': 'loss', 'content': 0.0005269505199976265, 'timestamp': '2025-09-10 02:31:46.159024', 'step': 5712, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:46.188184', 'step': 5712, 'epoch': 3} {'type': 'loss', 'content': 0.008309791795909405, 'timestamp': '2025-09-10 02:31:46.190126', 'step': 5713, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:46.218870', 'step': 5713, 'epoch': 3} {'type': 'loss', 'content': 0.000230509860557504, 'timestamp': '2025-09-10 02:31:46.220536', 'step': 5714, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:46.249095', 'step': 5714, 'epoch': 3} {'type': 'loss', 'content': 0.00010207213199464604, 'timestamp': '2025-09-10 02:31:46.251119', 'step': 5715, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:46.279905', 'step': 5715, 'epoch': 3} {'type': 'loss', 'content': 0.0008101228740997612, 'timestamp': '2025-09-10 02:31:46.303302', 'step': 5716, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:46.332126', 'step': 5716, 'epoch': 3} {'type': 'loss', 'content': 0.00028212874894961715, 'timestamp': '2025-09-10 02:31:46.334070', 'step': 5717, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:46.362881', 'step': 5717, 'epoch': 3} {'type': 'loss', 'content': 0.007300230674445629, 'timestamp': '2025-09-10 02:31:46.364600', 'step': 5718, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:46.393334', 'step': 5718, 'epoch': 3} {'type': 'loss', 'content': 0.007928567007184029, 'timestamp': '2025-09-10 02:31:46.395258', 'step': 5719, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:46.424430', 'step': 5719, 'epoch': 3} {'type': 'loss', 'content': 0.0006109363748691976, 'timestamp': '2025-09-10 02:31:46.447983', 'step': 5720, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:46.477844', 'step': 5720, 'epoch': 3} {'type': 'loss', 'content': 0.0002473728090990335, 'timestamp': '2025-09-10 02:31:46.479678', 'step': 5721, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:46.508465', 'step': 5721, 'epoch': 3} {'type': 'loss', 'content': 0.0004756476264446974, 'timestamp': '2025-09-10 02:31:46.510195', 'step': 5722, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:46.539048', 'step': 5722, 'epoch': 3} {'type': 'loss', 'content': 0.0001675796665949747, 'timestamp': '2025-09-10 02:31:46.541145', 'step': 5723, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:46.570097', 'step': 5723, 'epoch': 3} {'type': 'loss', 'content': 0.0015109594678506255, 'timestamp': '2025-09-10 02:31:46.593780', 'step': 5724, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:46.623889', 'step': 5724, 'epoch': 3} {'type': 'loss', 'content': 0.0002595992118585855, 'timestamp': '2025-09-10 02:31:46.625533', 'step': 5725, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:46.654084', 'step': 5725, 'epoch': 3} {'type': 'loss', 'content': 8.602546586189419e-05, 'timestamp': '2025-09-10 02:31:46.656194', 'step': 5726, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:46.685045', 'step': 5726, 'epoch': 3} {'type': 'loss', 'content': 0.0002019908424699679, 'timestamp': '2025-09-10 02:31:46.686852', 'step': 5727, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:46.715506', 'step': 5727, 'epoch': 3} {'type': 'loss', 'content': 0.0012018673587590456, 'timestamp': '2025-09-10 02:31:46.738983', 'step': 5728, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:46.767808', 'step': 5728, 'epoch': 3} {'type': 'loss', 'content': 0.03411541134119034, 'timestamp': '2025-09-10 02:31:46.769561', 'step': 5729, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:46.798383', 'step': 5729, 'epoch': 3} {'type': 'loss', 'content': 0.03333604335784912, 'timestamp': '2025-09-10 02:31:46.800214', 'step': 5730, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:46.829696', 'step': 5730, 'epoch': 3} {'type': 'loss', 'content': 0.0006598546169698238, 'timestamp': '2025-09-10 02:31:46.831891', 'step': 5731, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:46.861057', 'step': 5731, 'epoch': 3} {'type': 'loss', 'content': 0.0002100792044075206, 'timestamp': '2025-09-10 02:31:46.884304', 'step': 5732, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:46.913193', 'step': 5732, 'epoch': 3} {'type': 'loss', 'content': 0.0016376635758206248, 'timestamp': '2025-09-10 02:31:46.914935', 'step': 5733, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:46.943696', 'step': 5733, 'epoch': 3} {'type': 'loss', 'content': 0.0001775900600478053, 'timestamp': '2025-09-10 02:31:46.945537', 'step': 5734, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:46.974148', 'step': 5734, 'epoch': 3} {'type': 'loss', 'content': 0.0016689964104443789, 'timestamp': '2025-09-10 02:31:46.976102', 'step': 5735, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:47.005222', 'step': 5735, 'epoch': 3} {'type': 'loss', 'content': 0.00017439691873732954, 'timestamp': '2025-09-10 02:31:47.028498', 'step': 5736, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:47.057064', 'step': 5736, 'epoch': 3} {'type': 'loss', 'content': 0.00033982301829382777, 'timestamp': '2025-09-10 02:31:47.058925', 'step': 5737, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:47.087924', 'step': 5737, 'epoch': 3} {'type': 'loss', 'content': 0.0007048108382150531, 'timestamp': '2025-09-10 02:31:47.089932', 'step': 5738, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:47.118528', 'step': 5738, 'epoch': 3} {'type': 'loss', 'content': 0.0036625401116907597, 'timestamp': '2025-09-10 02:31:47.120499', 'step': 5739, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:47.149082', 'step': 5739, 'epoch': 3} {'type': 'loss', 'content': 0.0017860140651464462, 'timestamp': '2025-09-10 02:31:47.172395', 'step': 5740, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:47.201947', 'step': 5740, 'epoch': 3} {'type': 'loss', 'content': 0.027387376874685287, 'timestamp': '2025-09-10 02:31:47.203784', 'step': 5741, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:47.234056', 'step': 5741, 'epoch': 3} {'type': 'loss', 'content': 0.01553224865347147, 'timestamp': '2025-09-10 02:31:47.235775', 'step': 5742, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:47.267071', 'step': 5742, 'epoch': 3} {'type': 'loss', 'content': 0.006673471070826054, 'timestamp': '2025-09-10 02:31:47.268720', 'step': 5743, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:47.297478', 'step': 5743, 'epoch': 3} {'type': 'loss', 'content': 0.00022507687390316278, 'timestamp': '2025-09-10 02:31:47.320983', 'step': 5744, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:47.353129', 'step': 5744, 'epoch': 3} {'type': 'loss', 'content': 0.00032787161762826145, 'timestamp': '2025-09-10 02:31:47.354960', 'step': 5745, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:47.386566', 'step': 5745, 'epoch': 3} {'type': 'loss', 'content': 0.00011085504229413345, 'timestamp': '2025-09-10 02:31:47.388558', 'step': 5746, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:47.422358', 'step': 5746, 'epoch': 3} {'type': 'loss', 'content': 0.0011449556332081556, 'timestamp': '2025-09-10 02:31:47.424370', 'step': 5747, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:47.454433', 'step': 5747, 'epoch': 3} {'type': 'loss', 'content': 0.0012391246855258942, 'timestamp': '2025-09-10 02:31:47.477679', 'step': 5748, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:47.512361', 'step': 5748, 'epoch': 3} {'type': 'loss', 'content': 0.036838971078395844, 'timestamp': '2025-09-10 02:31:47.514148', 'step': 5749, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:47.544998', 'step': 5749, 'epoch': 3} {'type': 'loss', 'content': 0.00018976135470438749, 'timestamp': '2025-09-10 02:31:47.546875', 'step': 5750, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:47.582705', 'step': 5750, 'epoch': 3} {'type': 'loss', 'content': 0.002059570513665676, 'timestamp': '2025-09-10 02:31:47.584587', 'step': 5751, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:47.617860', 'step': 5751, 'epoch': 3} {'type': 'loss', 'content': 0.005095013417303562, 'timestamp': '2025-09-10 02:31:47.641312', 'step': 5752, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:47.673022', 'step': 5752, 'epoch': 3} {'type': 'loss', 'content': 0.002301124855875969, 'timestamp': '2025-09-10 02:31:47.674787', 'step': 5753, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:47.712711', 'step': 5753, 'epoch': 3} {'type': 'loss', 'content': 7.774233381496742e-05, 'timestamp': '2025-09-10 02:31:47.714989', 'step': 5754, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:47.752301', 'step': 5754, 'epoch': 3} {'type': 'loss', 'content': 0.00024010757624637336, 'timestamp': '2025-09-10 02:31:47.754390', 'step': 5755, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:47.791725', 'step': 5755, 'epoch': 3} {'type': 'loss', 'content': 0.0002837387437466532, 'timestamp': '2025-09-10 02:31:47.815135', 'step': 5756, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:47.845576', 'step': 5756, 'epoch': 3} {'type': 'loss', 'content': 0.0004090330039616674, 'timestamp': '2025-09-10 02:31:47.847474', 'step': 5757, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:47.876408', 'step': 5757, 'epoch': 3} {'type': 'loss', 'content': 0.00031902006594464183, 'timestamp': '2025-09-10 02:31:47.878312', 'step': 5758, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:47.907084', 'step': 5758, 'epoch': 3} {'type': 'loss', 'content': 0.004382619168609381, 'timestamp': '2025-09-10 02:31:47.909141', 'step': 5759, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:47.937802', 'step': 5759, 'epoch': 3} {'type': 'loss', 'content': 0.08181292563676834, 'timestamp': '2025-09-10 02:31:47.961436', 'step': 5760, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:47.990622', 'step': 5760, 'epoch': 3} {'type': 'loss', 'content': 0.0013247819151729345, 'timestamp': '2025-09-10 02:31:47.992426', 'step': 5761, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:48.021128', 'step': 5761, 'epoch': 3} {'type': 'loss', 'content': 0.0007081844960339367, 'timestamp': '2025-09-10 02:31:48.022663', 'step': 5762, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:31:48.051528', 'step': 5762, 'epoch': 3} {'type': 'loss', 'content': 0.0015095778508111835, 'timestamp': '2025-09-10 02:31:48.053937', 'step': 5763, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:48.083710', 'step': 5763, 'epoch': 3} {'type': 'loss', 'content': 0.0020556601230055094, 'timestamp': '2025-09-10 02:31:48.107152', 'step': 5764, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:48.136711', 'step': 5764, 'epoch': 3} {'type': 'loss', 'content': 0.00028491643024608493, 'timestamp': '2025-09-10 02:31:48.138518', 'step': 5765, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:48.167250', 'step': 5765, 'epoch': 3} {'type': 'loss', 'content': 0.0002188169164583087, 'timestamp': '2025-09-10 02:31:48.168985', 'step': 5766, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:48.197866', 'step': 5766, 'epoch': 3} {'type': 'loss', 'content': 0.04480767622590065, 'timestamp': '2025-09-10 02:31:48.199672', 'step': 5767, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:48.228449', 'step': 5767, 'epoch': 3} {'type': 'loss', 'content': 0.00042242585914209485, 'timestamp': '2025-09-10 02:31:48.251728', 'step': 5768, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:48.280885', 'step': 5768, 'epoch': 3} {'type': 'loss', 'content': 0.0026514700148254633, 'timestamp': '2025-09-10 02:31:48.282594', 'step': 5769, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:48.311204', 'step': 5769, 'epoch': 3} {'type': 'loss', 'content': 0.012968815863132477, 'timestamp': '2025-09-10 02:31:48.312947', 'step': 5770, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:48.341817', 'step': 5770, 'epoch': 3} {'type': 'loss', 'content': 0.04409560188651085, 'timestamp': '2025-09-10 02:31:48.343913', 'step': 5771, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:48.374021', 'step': 5771, 'epoch': 3} {'type': 'loss', 'content': 0.00035515808849595487, 'timestamp': '2025-09-10 02:31:48.398433', 'step': 5772, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:48.427376', 'step': 5772, 'epoch': 3} {'type': 'loss', 'content': 0.0007939469651319087, 'timestamp': '2025-09-10 02:31:48.429340', 'step': 5773, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:48.458475', 'step': 5773, 'epoch': 3} {'type': 'loss', 'content': 0.00019278070249129087, 'timestamp': '2025-09-10 02:31:48.460554', 'step': 5774, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:48.489718', 'step': 5774, 'epoch': 3} {'type': 'loss', 'content': 0.00019546682597137988, 'timestamp': '2025-09-10 02:31:48.491421', 'step': 5775, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:48.520608', 'step': 5775, 'epoch': 3} {'type': 'loss', 'content': 0.0004952874151058495, 'timestamp': '2025-09-10 02:31:48.544045', 'step': 5776, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1582003754624}], 'timestamp': '2025-09-10 02:31:50.410059', 'step': 5776, 'epoch': 3} {'type': 'pplx', 'content': 2330602.4634110588, 'timestamp': '2025-09-10 02:31:50.411927', 'step': 5776, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:50.440035', 'step': 5776, 'epoch': 3} {'type': 'loss', 'content': 0.0005920507828705013, 'timestamp': '2025-09-10 02:31:50.441848', 'step': 5777, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:50.471248', 'step': 5777, 'epoch': 3} {'type': 'loss', 'content': 0.00026921770768240094, 'timestamp': '2025-09-10 02:31:50.472899', 'step': 5778, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:50.501698', 'step': 5778, 'epoch': 3} {'type': 'loss', 'content': 0.0002104615414282307, 'timestamp': '2025-09-10 02:31:50.503654', 'step': 5779, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:50.532767', 'step': 5779, 'epoch': 3} {'type': 'loss', 'content': 0.0003762553387787193, 'timestamp': '2025-09-10 02:31:50.557091', 'step': 5780, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:50.587349', 'step': 5780, 'epoch': 3} {'type': 'loss', 'content': 0.0065718465484678745, 'timestamp': '2025-09-10 02:31:50.589917', 'step': 5781, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:50.619518', 'step': 5781, 'epoch': 3} {'type': 'loss', 'content': 0.0004872330173384398, 'timestamp': '2025-09-10 02:31:50.622054', 'step': 5782, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:50.651886', 'step': 5782, 'epoch': 3} {'type': 'loss', 'content': 0.0020292175468057394, 'timestamp': '2025-09-10 02:31:50.654172', 'step': 5783, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:50.683412', 'step': 5783, 'epoch': 3} {'type': 'loss', 'content': 0.00045010444591753185, 'timestamp': '2025-09-10 02:31:50.706752', 'step': 5784, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:50.736095', 'step': 5784, 'epoch': 3} {'type': 'loss', 'content': 0.002559373155236244, 'timestamp': '2025-09-10 02:31:50.737889', 'step': 5785, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:50.766697', 'step': 5785, 'epoch': 3} {'type': 'loss', 'content': 0.0009514554985798895, 'timestamp': '2025-09-10 02:31:50.770075', 'step': 5786, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:50.799853', 'step': 5786, 'epoch': 3} {'type': 'loss', 'content': 0.0012386254966259003, 'timestamp': '2025-09-10 02:31:50.801607', 'step': 5787, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:50.830616', 'step': 5787, 'epoch': 3} {'type': 'loss', 'content': 0.0008899965905584395, 'timestamp': '2025-09-10 02:31:50.853985', 'step': 5788, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:50.883341', 'step': 5788, 'epoch': 3} {'type': 'loss', 'content': 0.0013884452637284994, 'timestamp': '2025-09-10 02:31:50.885093', 'step': 5789, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:50.913824', 'step': 5789, 'epoch': 3} {'type': 'loss', 'content': 0.0001731060619931668, 'timestamp': '2025-09-10 02:31:50.915673', 'step': 5790, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:50.944182', 'step': 5790, 'epoch': 3} {'type': 'loss', 'content': 8.914165664464235e-05, 'timestamp': '2025-09-10 02:31:50.946125', 'step': 5791, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:50.975025', 'step': 5791, 'epoch': 3} {'type': 'loss', 'content': 0.00023134097864385694, 'timestamp': '2025-09-10 02:31:50.998585', 'step': 5792, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:51.027888', 'step': 5792, 'epoch': 3} {'type': 'loss', 'content': 0.0008638862636871636, 'timestamp': '2025-09-10 02:31:51.029779', 'step': 5793, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:51.058969', 'step': 5793, 'epoch': 3} {'type': 'loss', 'content': 0.0007284631137736142, 'timestamp': '2025-09-10 02:31:51.060864', 'step': 5794, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:51.090109', 'step': 5794, 'epoch': 3} {'type': 'loss', 'content': 0.0008212543907575309, 'timestamp': '2025-09-10 02:31:51.091969', 'step': 5795, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:51.121331', 'step': 5795, 'epoch': 3} {'type': 'loss', 'content': 0.0005974514642730355, 'timestamp': '2025-09-10 02:31:51.144962', 'step': 5796, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:51.175278', 'step': 5796, 'epoch': 3} {'type': 'loss', 'content': 0.00045890998444519937, 'timestamp': '2025-09-10 02:31:51.177124', 'step': 5797, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:51.206090', 'step': 5797, 'epoch': 3} {'type': 'loss', 'content': 0.0006132670678198338, 'timestamp': '2025-09-10 02:31:51.208141', 'step': 5798, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:51.241376', 'step': 5798, 'epoch': 3} {'type': 'loss', 'content': 0.006274589337408543, 'timestamp': '2025-09-10 02:31:51.243271', 'step': 5799, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:51.276803', 'step': 5799, 'epoch': 3} {'type': 'loss', 'content': 0.0025721543934196234, 'timestamp': '2025-09-10 02:31:51.300187', 'step': 5800, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:51.329637', 'step': 5800, 'epoch': 3} {'type': 'loss', 'content': 0.02713153138756752, 'timestamp': '2025-09-10 02:31:51.331631', 'step': 5801, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:51.361569', 'step': 5801, 'epoch': 3} {'type': 'loss', 'content': 0.005381520371884108, 'timestamp': '2025-09-10 02:31:51.363456', 'step': 5802, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:51.395179', 'step': 5802, 'epoch': 3} {'type': 'loss', 'content': 0.00021333516633603722, 'timestamp': '2025-09-10 02:31:51.397066', 'step': 5803, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:51.432629', 'step': 5803, 'epoch': 3} {'type': 'loss', 'content': 0.013148345984518528, 'timestamp': '2025-09-10 02:31:51.455952', 'step': 5804, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:51.493342', 'step': 5804, 'epoch': 3} {'type': 'loss', 'content': 0.0017936977092176676, 'timestamp': '2025-09-10 02:31:51.495445', 'step': 5805, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:51.529144', 'step': 5805, 'epoch': 3} {'type': 'loss', 'content': 0.0025234627537429333, 'timestamp': '2025-09-10 02:31:51.530972', 'step': 5806, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:51.564887', 'step': 5806, 'epoch': 3} {'type': 'loss', 'content': 0.029608607292175293, 'timestamp': '2025-09-10 02:31:51.566854', 'step': 5807, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:51.601288', 'step': 5807, 'epoch': 3} {'type': 'loss', 'content': 0.0006367245805449784, 'timestamp': '2025-09-10 02:31:51.624639', 'step': 5808, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:51.658145', 'step': 5808, 'epoch': 3} {'type': 'loss', 'content': 0.013967028819024563, 'timestamp': '2025-09-10 02:31:51.660111', 'step': 5809, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:51.700282', 'step': 5809, 'epoch': 3} {'type': 'loss', 'content': 0.038395337760448456, 'timestamp': '2025-09-10 02:31:51.702246', 'step': 5810, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:51.740215', 'step': 5810, 'epoch': 3} {'type': 'loss', 'content': 0.0006521128234453499, 'timestamp': '2025-09-10 02:31:51.742371', 'step': 5811, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:51.777028', 'step': 5811, 'epoch': 3} {'type': 'loss', 'content': 0.0009065433405339718, 'timestamp': '2025-09-10 02:31:51.800571', 'step': 5812, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:51.837874', 'step': 5812, 'epoch': 3} {'type': 'loss', 'content': 0.0004069570859428495, 'timestamp': '2025-09-10 02:31:51.839799', 'step': 5813, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:51.868942', 'step': 5813, 'epoch': 3} {'type': 'loss', 'content': 0.0006121239275671542, 'timestamp': '2025-09-10 02:31:51.871505', 'step': 5814, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:51.904491', 'step': 5814, 'epoch': 3} {'type': 'loss', 'content': 0.0018002677243202925, 'timestamp': '2025-09-10 02:31:51.906327', 'step': 5815, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:51.936038', 'step': 5815, 'epoch': 3} {'type': 'loss', 'content': 0.0005376306944526732, 'timestamp': '2025-09-10 02:31:51.959424', 'step': 5816, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:51.988676', 'step': 5816, 'epoch': 3} {'type': 'loss', 'content': 0.003357131266966462, 'timestamp': '2025-09-10 02:31:51.990791', 'step': 5817, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:52.020505', 'step': 5817, 'epoch': 3} {'type': 'loss', 'content': 0.00049630954163149, 'timestamp': '2025-09-10 02:31:52.022567', 'step': 5818, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:52.052230', 'step': 5818, 'epoch': 3} {'type': 'loss', 'content': 0.0072017270140349865, 'timestamp': '2025-09-10 02:31:52.054080', 'step': 5819, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:52.084707', 'step': 5819, 'epoch': 3} {'type': 'loss', 'content': 0.0019593555480241776, 'timestamp': '2025-09-10 02:31:52.108299', 'step': 5820, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:52.138735', 'step': 5820, 'epoch': 3} {'type': 'loss', 'content': 0.0036003238055855036, 'timestamp': '2025-09-10 02:31:52.140571', 'step': 5821, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:52.169655', 'step': 5821, 'epoch': 3} {'type': 'loss', 'content': 0.00046660873340442777, 'timestamp': '2025-09-10 02:31:52.171643', 'step': 5822, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:52.200737', 'step': 5822, 'epoch': 3} {'type': 'loss', 'content': 0.045283399522304535, 'timestamp': '2025-09-10 02:31:52.202591', 'step': 5823, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:52.231705', 'step': 5823, 'epoch': 3} {'type': 'loss', 'content': 0.0009746053256094456, 'timestamp': '2025-09-10 02:31:52.255239', 'step': 5824, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:52.285400', 'step': 5824, 'epoch': 3} {'type': 'loss', 'content': 0.0008497206727042794, 'timestamp': '2025-09-10 02:31:52.287238', 'step': 5825, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:52.316933', 'step': 5825, 'epoch': 3} {'type': 'loss', 'content': 0.0004335683770477772, 'timestamp': '2025-09-10 02:31:52.319057', 'step': 5826, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:52.348011', 'step': 5826, 'epoch': 3} {'type': 'loss', 'content': 0.0022922304924577475, 'timestamp': '2025-09-10 02:31:52.349935', 'step': 5827, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:52.379966', 'step': 5827, 'epoch': 3} {'type': 'loss', 'content': 0.0004272214137017727, 'timestamp': '2025-09-10 02:31:52.404518', 'step': 5828, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:52.434294', 'step': 5828, 'epoch': 3} {'type': 'loss', 'content': 0.009445090778172016, 'timestamp': '2025-09-10 02:31:52.436265', 'step': 5829, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:52.466402', 'step': 5829, 'epoch': 3} {'type': 'loss', 'content': 0.00040190972504206, 'timestamp': '2025-09-10 02:31:52.468277', 'step': 5830, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:52.497234', 'step': 5830, 'epoch': 3} {'type': 'loss', 'content': 0.0007058191695250571, 'timestamp': '2025-09-10 02:31:52.499275', 'step': 5831, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:52.528593', 'step': 5831, 'epoch': 3} {'type': 'loss', 'content': 0.0005436336505226791, 'timestamp': '2025-09-10 02:31:52.551978', 'step': 5832, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:31:52.581381', 'step': 5832, 'epoch': 3} {'type': 'loss', 'content': 0.0031013458501547575, 'timestamp': '2025-09-10 02:31:52.583328', 'step': 5833, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:52.613344', 'step': 5833, 'epoch': 3} {'type': 'loss', 'content': 0.0003365006123203784, 'timestamp': '2025-09-10 02:31:52.615411', 'step': 5834, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:52.645361', 'step': 5834, 'epoch': 3} {'type': 'loss', 'content': 0.002159892814233899, 'timestamp': '2025-09-10 02:31:52.647335', 'step': 5835, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:52.676897', 'step': 5835, 'epoch': 3} {'type': 'loss', 'content': 0.001839548465795815, 'timestamp': '2025-09-10 02:31:52.700300', 'step': 5836, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:52.730630', 'step': 5836, 'epoch': 3} {'type': 'loss', 'content': 0.012229649350047112, 'timestamp': '2025-09-10 02:31:52.732493', 'step': 5837, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:52.761874', 'step': 5837, 'epoch': 3} {'type': 'loss', 'content': 0.00215616705827415, 'timestamp': '2025-09-10 02:31:52.763780', 'step': 5838, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:52.792815', 'step': 5838, 'epoch': 3} {'type': 'loss', 'content': 0.0003829559136647731, 'timestamp': '2025-09-10 02:31:52.794775', 'step': 5839, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:52.824006', 'step': 5839, 'epoch': 3} {'type': 'loss', 'content': 0.0007110520382411778, 'timestamp': '2025-09-10 02:31:52.847482', 'step': 5840, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:52.877098', 'step': 5840, 'epoch': 3} {'type': 'loss', 'content': 0.005053516011685133, 'timestamp': '2025-09-10 02:31:52.879209', 'step': 5841, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:52.908629', 'step': 5841, 'epoch': 3} {'type': 'loss', 'content': 0.0011301599442958832, 'timestamp': '2025-09-10 02:31:52.910385', 'step': 5842, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:52.939694', 'step': 5842, 'epoch': 3} {'type': 'loss', 'content': 0.00040204497054219246, 'timestamp': '2025-09-10 02:31:52.941414', 'step': 5843, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:52.971693', 'step': 5843, 'epoch': 3} {'type': 'loss', 'content': 0.0005023189005441964, 'timestamp': '2025-09-10 02:31:52.995426', 'step': 5844, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:53.026918', 'step': 5844, 'epoch': 3} {'type': 'loss', 'content': 0.003944059368222952, 'timestamp': '2025-09-10 02:31:53.028776', 'step': 5845, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:53.057768', 'step': 5845, 'epoch': 3} {'type': 'loss', 'content': 0.00814901851117611, 'timestamp': '2025-09-10 02:31:53.059975', 'step': 5846, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:53.089313', 'step': 5846, 'epoch': 3} {'type': 'loss', 'content': 0.0005188480718061328, 'timestamp': '2025-09-10 02:31:53.093668', 'step': 5847, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:53.123245', 'step': 5847, 'epoch': 3} {'type': 'loss', 'content': 0.0526927225291729, 'timestamp': '2025-09-10 02:31:53.146581', 'step': 5848, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:53.178308', 'step': 5848, 'epoch': 3} {'type': 'loss', 'content': 0.003942835610359907, 'timestamp': '2025-09-10 02:31:53.180061', 'step': 5849, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:53.210017', 'step': 5849, 'epoch': 3} {'type': 'loss', 'content': 0.0014322620118036866, 'timestamp': '2025-09-10 02:31:53.211834', 'step': 5850, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:53.246306', 'step': 5850, 'epoch': 3} {'type': 'loss', 'content': 0.0004631231422536075, 'timestamp': '2025-09-10 02:31:53.248151', 'step': 5851, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:53.279559', 'step': 5851, 'epoch': 3} {'type': 'loss', 'content': 0.0005005900748074055, 'timestamp': '2025-09-10 02:31:53.302877', 'step': 5852, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:53.335605', 'step': 5852, 'epoch': 3} {'type': 'loss', 'content': 0.0005041599506512284, 'timestamp': '2025-09-10 02:31:53.337428', 'step': 5853, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:53.367372', 'step': 5853, 'epoch': 3} {'type': 'loss', 'content': 0.001684217480942607, 'timestamp': '2025-09-10 02:31:53.373889', 'step': 5854, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:53.412025', 'step': 5854, 'epoch': 3} {'type': 'loss', 'content': 0.0003901127784047276, 'timestamp': '2025-09-10 02:31:53.419643', 'step': 5855, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:53.449706', 'step': 5855, 'epoch': 3} {'type': 'loss', 'content': 0.006597200874239206, 'timestamp': '2025-09-10 02:31:53.478440', 'step': 5856, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:53.510928', 'step': 5856, 'epoch': 3} {'type': 'loss', 'content': 0.0013561425730586052, 'timestamp': '2025-09-10 02:31:53.514196', 'step': 5857, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:53.546577', 'step': 5857, 'epoch': 3} {'type': 'loss', 'content': 0.0006679397192783654, 'timestamp': '2025-09-10 02:31:53.548432', 'step': 5858, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:53.584326', 'step': 5858, 'epoch': 3} {'type': 'loss', 'content': 0.0003755086218006909, 'timestamp': '2025-09-10 02:31:53.586891', 'step': 5859, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:53.618786', 'step': 5859, 'epoch': 3} {'type': 'loss', 'content': 0.00016341873561032116, 'timestamp': '2025-09-10 02:31:53.642278', 'step': 5860, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:53.676004', 'step': 5860, 'epoch': 3} {'type': 'loss', 'content': 0.001248411601409316, 'timestamp': '2025-09-10 02:31:53.677849', 'step': 5861, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:53.718426', 'step': 5861, 'epoch': 3} {'type': 'loss', 'content': 0.050770360976457596, 'timestamp': '2025-09-10 02:31:53.720181', 'step': 5862, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:53.756187', 'step': 5862, 'epoch': 3} {'type': 'loss', 'content': 0.0010143638355657458, 'timestamp': '2025-09-10 02:31:53.758185', 'step': 5863, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:53.796248', 'step': 5863, 'epoch': 3} {'type': 'loss', 'content': 0.000550613272935152, 'timestamp': '2025-09-10 02:31:53.820062', 'step': 5864, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:53.848526', 'step': 5864, 'epoch': 3} {'type': 'loss', 'content': 0.0018723468529060483, 'timestamp': '2025-09-10 02:31:53.850540', 'step': 5865, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:53.879570', 'step': 5865, 'epoch': 3} {'type': 'loss', 'content': 0.004078911151736975, 'timestamp': '2025-09-10 02:31:53.882249', 'step': 5866, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:53.912604', 'step': 5866, 'epoch': 3} {'type': 'loss', 'content': 0.0042920708656311035, 'timestamp': '2025-09-10 02:31:53.914421', 'step': 5867, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:53.944436', 'step': 5867, 'epoch': 3} {'type': 'loss', 'content': 0.0004184663703199476, 'timestamp': '2025-09-10 02:31:53.968016', 'step': 5868, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:53.998054', 'step': 5868, 'epoch': 3} {'type': 'loss', 'content': 0.000920470745768398, 'timestamp': '2025-09-10 02:31:54.000266', 'step': 5869, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:54.030164', 'step': 5869, 'epoch': 3} {'type': 'loss', 'content': 0.0045341490767896175, 'timestamp': '2025-09-10 02:31:54.032093', 'step': 5870, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:54.060869', 'step': 5870, 'epoch': 3} {'type': 'loss', 'content': 0.014640615321695805, 'timestamp': '2025-09-10 02:31:54.063086', 'step': 5871, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:54.092417', 'step': 5871, 'epoch': 3} {'type': 'loss', 'content': 0.000992514076642692, 'timestamp': '2025-09-10 02:31:54.115823', 'step': 5872, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:54.145292', 'step': 5872, 'epoch': 3} {'type': 'loss', 'content': 0.00285705947317183, 'timestamp': '2025-09-10 02:31:54.147053', 'step': 5873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:54.176029', 'step': 5873, 'epoch': 3} {'type': 'loss', 'content': 0.0011258694576099515, 'timestamp': '2025-09-10 02:31:54.177631', 'step': 5874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:54.206389', 'step': 5874, 'epoch': 3} {'type': 'loss', 'content': 0.0012898036511614919, 'timestamp': '2025-09-10 02:31:54.208384', 'step': 5875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:54.237269', 'step': 5875, 'epoch': 3} {'type': 'loss', 'content': 0.002289626281708479, 'timestamp': '2025-09-10 02:31:54.260714', 'step': 5876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:54.289897', 'step': 5876, 'epoch': 3} {'type': 'loss', 'content': 0.0010736450785771012, 'timestamp': '2025-09-10 02:31:54.291830', 'step': 5877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:54.320492', 'step': 5877, 'epoch': 3} {'type': 'loss', 'content': 0.0017673444235697389, 'timestamp': '2025-09-10 02:31:54.322274', 'step': 5878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:54.352502', 'step': 5878, 'epoch': 3} {'type': 'loss', 'content': 0.0006449141656048596, 'timestamp': '2025-09-10 02:31:54.354429', 'step': 5879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:54.387157', 'step': 5879, 'epoch': 3} {'type': 'loss', 'content': 0.0019490603590384126, 'timestamp': '2025-09-10 02:31:54.410394', 'step': 5880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:54.439696', 'step': 5880, 'epoch': 3} {'type': 'loss', 'content': 0.03452775254845619, 'timestamp': '2025-09-10 02:31:54.441554', 'step': 5881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:54.470566', 'step': 5881, 'epoch': 3} {'type': 'loss', 'content': 0.0015733633190393448, 'timestamp': '2025-09-10 02:31:54.472185', 'step': 5882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:54.500917', 'step': 5882, 'epoch': 3} {'type': 'loss', 'content': 0.0012607196113094687, 'timestamp': '2025-09-10 02:31:54.503111', 'step': 5883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:54.532362', 'step': 5883, 'epoch': 3} {'type': 'loss', 'content': 0.015937503427267075, 'timestamp': '2025-09-10 02:31:54.556031', 'step': 5884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:54.586255', 'step': 5884, 'epoch': 3} {'type': 'loss', 'content': 0.0011639997828751802, 'timestamp': '2025-09-10 02:31:54.587987', 'step': 5885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:54.617817', 'step': 5885, 'epoch': 3} {'type': 'loss', 'content': 0.0006883523310534656, 'timestamp': '2025-09-10 02:31:54.619596', 'step': 5886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:54.648748', 'step': 5886, 'epoch': 3} {'type': 'loss', 'content': 0.0019631306640803814, 'timestamp': '2025-09-10 02:31:54.650583', 'step': 5887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:54.679866', 'step': 5887, 'epoch': 3} {'type': 'loss', 'content': 0.02031813934445381, 'timestamp': '2025-09-10 02:31:54.703359', 'step': 5888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:54.733765', 'step': 5888, 'epoch': 3} {'type': 'loss', 'content': 0.02791324444115162, 'timestamp': '2025-09-10 02:31:54.735594', 'step': 5889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:54.764386', 'step': 5889, 'epoch': 3} {'type': 'loss', 'content': 0.0005248067318461835, 'timestamp': '2025-09-10 02:31:54.765995', 'step': 5890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:54.794937', 'step': 5890, 'epoch': 3} {'type': 'loss', 'content': 0.0004645258595701307, 'timestamp': '2025-09-10 02:31:54.796544', 'step': 5891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:54.825292', 'step': 5891, 'epoch': 3} {'type': 'loss', 'content': 0.0017598795238882303, 'timestamp': '2025-09-10 02:31:54.848731', 'step': 5892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:54.879158', 'step': 5892, 'epoch': 3} {'type': 'loss', 'content': 0.0008864394039846957, 'timestamp': '2025-09-10 02:31:54.880835', 'step': 5893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:54.909438', 'step': 5893, 'epoch': 3} {'type': 'loss', 'content': 0.00020339501497801393, 'timestamp': '2025-09-10 02:31:54.911332', 'step': 5894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:54.940462', 'step': 5894, 'epoch': 3} {'type': 'loss', 'content': 0.024470051750540733, 'timestamp': '2025-09-10 02:31:54.942378', 'step': 5895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:54.971771', 'step': 5895, 'epoch': 3} {'type': 'loss', 'content': 0.0006051209638826549, 'timestamp': '2025-09-10 02:31:54.995111', 'step': 5896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:55.024545', 'step': 5896, 'epoch': 3} {'type': 'loss', 'content': 0.0007404032512567937, 'timestamp': '2025-09-10 02:31:55.026184', 'step': 5897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:55.055872', 'step': 5897, 'epoch': 3} {'type': 'loss', 'content': 0.00032552939956076443, 'timestamp': '2025-09-10 02:31:55.057797', 'step': 5898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:55.087012', 'step': 5898, 'epoch': 3} {'type': 'loss', 'content': 0.003726254915818572, 'timestamp': '2025-09-10 02:31:55.088908', 'step': 5899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:55.117630', 'step': 5899, 'epoch': 3} {'type': 'loss', 'content': 0.0005625720368698239, 'timestamp': '2025-09-10 02:31:55.140716', 'step': 5900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:55.169838', 'step': 5900, 'epoch': 3} {'type': 'loss', 'content': 0.0009212298900820315, 'timestamp': '2025-09-10 02:31:55.171602', 'step': 5901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:55.201818', 'step': 5901, 'epoch': 3} {'type': 'loss', 'content': 0.0010035239392891526, 'timestamp': '2025-09-10 02:31:55.203429', 'step': 5902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:55.236570', 'step': 5902, 'epoch': 3} {'type': 'loss', 'content': 0.0016566301928833127, 'timestamp': '2025-09-10 02:31:55.238521', 'step': 5903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:55.270811', 'step': 5903, 'epoch': 3} {'type': 'loss', 'content': 0.00018501277372706681, 'timestamp': '2025-09-10 02:31:55.294073', 'step': 5904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:55.323027', 'step': 5904, 'epoch': 3} {'type': 'loss', 'content': 0.0008375718025490642, 'timestamp': '2025-09-10 02:31:55.325336', 'step': 5905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:55.356208', 'step': 5905, 'epoch': 3} {'type': 'loss', 'content': 0.00016759525169618428, 'timestamp': '2025-09-10 02:31:55.358284', 'step': 5906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:55.389930', 'step': 5906, 'epoch': 3} {'type': 'loss', 'content': 0.006352846045047045, 'timestamp': '2025-09-10 02:31:55.391800', 'step': 5907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:55.425032', 'step': 5907, 'epoch': 3} {'type': 'loss', 'content': 0.0020307323429733515, 'timestamp': '2025-09-10 02:31:55.448322', 'step': 5908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:55.483577', 'step': 5908, 'epoch': 3} {'type': 'loss', 'content': 0.00038759110611863434, 'timestamp': '2025-09-10 02:31:55.485277', 'step': 5909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:55.517977', 'step': 5909, 'epoch': 3} {'type': 'loss', 'content': 0.0009370900806970894, 'timestamp': '2025-09-10 02:31:55.519595', 'step': 5910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:55.553212', 'step': 5910, 'epoch': 3} {'type': 'loss', 'content': 0.0011055589420720935, 'timestamp': '2025-09-10 02:31:55.555168', 'step': 5911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:55.590823', 'step': 5911, 'epoch': 3} {'type': 'loss', 'content': 0.0025008826050907373, 'timestamp': '2025-09-10 02:31:55.614135', 'step': 5912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:55.645030', 'step': 5912, 'epoch': 3} {'type': 'loss', 'content': 0.0004107403219677508, 'timestamp': '2025-09-10 02:31:55.647052', 'step': 5913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:55.683495', 'step': 5913, 'epoch': 3} {'type': 'loss', 'content': 0.006134289316833019, 'timestamp': '2025-09-10 02:31:55.685236', 'step': 5914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:55.724471', 'step': 5914, 'epoch': 3} {'type': 'loss', 'content': 0.00103579752612859, 'timestamp': '2025-09-10 02:31:55.726673', 'step': 5915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:55.761720', 'step': 5915, 'epoch': 3} {'type': 'loss', 'content': 0.006375829689204693, 'timestamp': '2025-09-10 02:31:55.785049', 'step': 5916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:55.823156', 'step': 5916, 'epoch': 3} {'type': 'loss', 'content': 0.00024464138550683856, 'timestamp': '2025-09-10 02:31:55.828580', 'step': 5917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:55.861859', 'step': 5917, 'epoch': 3} {'type': 'loss', 'content': 0.0005653674597851932, 'timestamp': '2025-09-10 02:31:55.863555', 'step': 5918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:55.892613', 'step': 5918, 'epoch': 3} {'type': 'loss', 'content': 0.013253341428935528, 'timestamp': '2025-09-10 02:31:55.894386', 'step': 5919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:55.923208', 'step': 5919, 'epoch': 3} {'type': 'loss', 'content': 0.0002616856654640287, 'timestamp': '2025-09-10 02:31:55.946392', 'step': 5920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:55.975194', 'step': 5920, 'epoch': 3} {'type': 'loss', 'content': 0.005493378732353449, 'timestamp': '2025-09-10 02:31:55.977009', 'step': 5921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:56.005628', 'step': 5921, 'epoch': 3} {'type': 'loss', 'content': 0.0023588540498167276, 'timestamp': '2025-09-10 02:31:56.007529', 'step': 5922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:56.036425', 'step': 5922, 'epoch': 3} {'type': 'loss', 'content': 0.0014897006331011653, 'timestamp': '2025-09-10 02:31:56.038208', 'step': 5923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:56.067038', 'step': 5923, 'epoch': 3} {'type': 'loss', 'content': 0.0002015876380028203, 'timestamp': '2025-09-10 02:31:56.090331', 'step': 5924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:56.124105', 'step': 5924, 'epoch': 3} {'type': 'loss', 'content': 0.0007182031986303627, 'timestamp': '2025-09-10 02:31:56.126203', 'step': 5925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:56.161037', 'step': 5925, 'epoch': 3} {'type': 'loss', 'content': 0.00025931946584023535, 'timestamp': '2025-09-10 02:31:56.162991', 'step': 5926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:56.197824', 'step': 5926, 'epoch': 3} {'type': 'loss', 'content': 0.0275877732783556, 'timestamp': '2025-09-10 02:31:56.202446', 'step': 5927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:56.234655', 'step': 5927, 'epoch': 3} {'type': 'loss', 'content': 0.001264668651856482, 'timestamp': '2025-09-10 02:31:56.258378', 'step': 5928, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1582003754624}], 'timestamp': '2025-09-10 02:31:58.184501', 'step': 5928, 'epoch': 3} {'type': 'pplx', 'content': 2466542.7706270437, 'timestamp': '2025-09-10 02:31:58.186501', 'step': 5928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:58.214004', 'step': 5928, 'epoch': 3} {'type': 'loss', 'content': 0.002576331840828061, 'timestamp': '2025-09-10 02:31:58.215811', 'step': 5929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:58.245130', 'step': 5929, 'epoch': 3} {'type': 'loss', 'content': 0.00018019216076936573, 'timestamp': '2025-09-10 02:31:58.246934', 'step': 5930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:58.276054', 'step': 5930, 'epoch': 3} {'type': 'loss', 'content': 0.00048270850675180554, 'timestamp': '2025-09-10 02:31:58.277925', 'step': 5931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:58.307356', 'step': 5931, 'epoch': 3} {'type': 'loss', 'content': 0.03198447450995445, 'timestamp': '2025-09-10 02:31:58.330665', 'step': 5932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:58.359832', 'step': 5932, 'epoch': 3} {'type': 'loss', 'content': 0.00030957843409851193, 'timestamp': '2025-09-10 02:31:58.361462', 'step': 5933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:58.390590', 'step': 5933, 'epoch': 3} {'type': 'loss', 'content': 0.032051801681518555, 'timestamp': '2025-09-10 02:31:58.392136', 'step': 5934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:58.420940', 'step': 5934, 'epoch': 3} {'type': 'loss', 'content': 0.006861783564090729, 'timestamp': '2025-09-10 02:31:58.422710', 'step': 5935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:58.451271', 'step': 5935, 'epoch': 3} {'type': 'loss', 'content': 0.00024682050570845604, 'timestamp': '2025-09-10 02:31:58.474823', 'step': 5936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:58.507240', 'step': 5936, 'epoch': 3} {'type': 'loss', 'content': 0.01753365434706211, 'timestamp': '2025-09-10 02:31:58.509197', 'step': 5937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:58.538031', 'step': 5937, 'epoch': 3} {'type': 'loss', 'content': 0.0010043384972959757, 'timestamp': '2025-09-10 02:31:58.539864', 'step': 5938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:58.568838', 'step': 5938, 'epoch': 3} {'type': 'loss', 'content': 0.002193879336118698, 'timestamp': '2025-09-10 02:31:58.570550', 'step': 5939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:58.599443', 'step': 5939, 'epoch': 3} {'type': 'loss', 'content': 0.00029714597621932626, 'timestamp': '2025-09-10 02:31:58.622606', 'step': 5940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:58.651623', 'step': 5940, 'epoch': 3} {'type': 'loss', 'content': 0.00016494235023856163, 'timestamp': '2025-09-10 02:31:58.653285', 'step': 5941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:58.682018', 'step': 5941, 'epoch': 3} {'type': 'loss', 'content': 0.00010710857168305665, 'timestamp': '2025-09-10 02:31:58.683825', 'step': 5942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:58.712386', 'step': 5942, 'epoch': 3} {'type': 'loss', 'content': 0.013505185022950172, 'timestamp': '2025-09-10 02:31:58.714249', 'step': 5943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:58.743289', 'step': 5943, 'epoch': 3} {'type': 'loss', 'content': 0.0005287728854455054, 'timestamp': '2025-09-10 02:31:58.766354', 'step': 5944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:58.795067', 'step': 5944, 'epoch': 3} {'type': 'loss', 'content': 0.0002007946459343657, 'timestamp': '2025-09-10 02:31:58.797069', 'step': 5945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:58.825866', 'step': 5945, 'epoch': 3} {'type': 'loss', 'content': 0.001181377680040896, 'timestamp': '2025-09-10 02:31:58.827841', 'step': 5946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:58.857665', 'step': 5946, 'epoch': 3} {'type': 'loss', 'content': 0.0009074655245058239, 'timestamp': '2025-09-10 02:31:58.859165', 'step': 5947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:58.888086', 'step': 5947, 'epoch': 3} {'type': 'loss', 'content': 0.0500468946993351, 'timestamp': '2025-09-10 02:31:58.911298', 'step': 5948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:58.940518', 'step': 5948, 'epoch': 3} {'type': 'loss', 'content': 0.00016007278463803232, 'timestamp': '2025-09-10 02:31:58.942217', 'step': 5949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:58.971482', 'step': 5949, 'epoch': 3} {'type': 'loss', 'content': 0.0016039551701396704, 'timestamp': '2025-09-10 02:31:58.973493', 'step': 5950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:31:59.002691', 'step': 5950, 'epoch': 3} {'type': 'loss', 'content': 0.005725264549255371, 'timestamp': '2025-09-10 02:31:59.004647', 'step': 5951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:59.033503', 'step': 5951, 'epoch': 3} {'type': 'loss', 'content': 0.010027670301496983, 'timestamp': '2025-09-10 02:31:59.056938', 'step': 5952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:59.100968', 'step': 5952, 'epoch': 3} {'type': 'loss', 'content': 0.0019840996246784925, 'timestamp': '2025-09-10 02:31:59.102882', 'step': 5953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:59.131539', 'step': 5953, 'epoch': 3} {'type': 'loss', 'content': 0.004044392611831427, 'timestamp': '2025-09-10 02:31:59.133287', 'step': 5954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:59.162571', 'step': 5954, 'epoch': 3} {'type': 'loss', 'content': 0.00650023901835084, 'timestamp': '2025-09-10 02:31:59.164476', 'step': 5955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:59.195932', 'step': 5955, 'epoch': 3} {'type': 'loss', 'content': 0.0003615424211602658, 'timestamp': '2025-09-10 02:31:59.223779', 'step': 5956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:59.256305', 'step': 5956, 'epoch': 3} {'type': 'loss', 'content': 0.028546255081892014, 'timestamp': '2025-09-10 02:31:59.257999', 'step': 5957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:59.299945', 'step': 5957, 'epoch': 3} {'type': 'loss', 'content': 0.011445007286965847, 'timestamp': '2025-09-10 02:31:59.301678', 'step': 5958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:59.331151', 'step': 5958, 'epoch': 3} {'type': 'loss', 'content': 0.0012820158153772354, 'timestamp': '2025-09-10 02:31:59.332913', 'step': 5959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:59.361600', 'step': 5959, 'epoch': 3} {'type': 'loss', 'content': 0.0003934795968234539, 'timestamp': '2025-09-10 02:31:59.385022', 'step': 5960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:59.417678', 'step': 5960, 'epoch': 3} {'type': 'loss', 'content': 0.00023115640215110034, 'timestamp': '2025-09-10 02:31:59.419492', 'step': 5961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:59.450992', 'step': 5961, 'epoch': 3} {'type': 'loss', 'content': 0.0009675567853264511, 'timestamp': '2025-09-10 02:31:59.452970', 'step': 5962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:59.488314', 'step': 5962, 'epoch': 3} {'type': 'loss', 'content': 0.0015642615035176277, 'timestamp': '2025-09-10 02:31:59.492027', 'step': 5963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:59.529088', 'step': 5963, 'epoch': 3} {'type': 'loss', 'content': 0.0010022606002166867, 'timestamp': '2025-09-10 02:31:59.552545', 'step': 5964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:59.592894', 'step': 5964, 'epoch': 3} {'type': 'loss', 'content': 0.00016989861615002155, 'timestamp': '2025-09-10 02:31:59.594853', 'step': 5965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:59.627923', 'step': 5965, 'epoch': 3} {'type': 'loss', 'content': 0.0013952319277450442, 'timestamp': '2025-09-10 02:31:59.629846', 'step': 5966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:59.666271', 'step': 5966, 'epoch': 3} {'type': 'loss', 'content': 0.006576180923730135, 'timestamp': '2025-09-10 02:31:59.668072', 'step': 5967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:59.708126', 'step': 5967, 'epoch': 3} {'type': 'loss', 'content': 0.003177030710503459, 'timestamp': '2025-09-10 02:31:59.731549', 'step': 5968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:59.773320', 'step': 5968, 'epoch': 3} {'type': 'loss', 'content': 0.0003268076397944242, 'timestamp': '2025-09-10 02:31:59.775306', 'step': 5969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:31:59.814971', 'step': 5969, 'epoch': 3} {'type': 'loss', 'content': 0.001026048674248159, 'timestamp': '2025-09-10 02:31:59.816842', 'step': 5970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:59.846495', 'step': 5970, 'epoch': 3} {'type': 'loss', 'content': 0.0011224242625758052, 'timestamp': '2025-09-10 02:31:59.848384', 'step': 5971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:59.879190', 'step': 5971, 'epoch': 3} {'type': 'loss', 'content': 0.0065633621998131275, 'timestamp': '2025-09-10 02:31:59.902812', 'step': 5972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:59.934696', 'step': 5972, 'epoch': 3} {'type': 'loss', 'content': 0.00255127833224833, 'timestamp': '2025-09-10 02:31:59.936480', 'step': 5973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:59.965207', 'step': 5973, 'epoch': 3} {'type': 'loss', 'content': 0.004116647876799107, 'timestamp': '2025-09-10 02:31:59.967068', 'step': 5974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:31:59.995787', 'step': 5974, 'epoch': 3} {'type': 'loss', 'content': 0.00024878952535800636, 'timestamp': '2025-09-10 02:31:59.997826', 'step': 5975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:00.026627', 'step': 5975, 'epoch': 3} {'type': 'loss', 'content': 0.001366226002573967, 'timestamp': '2025-09-10 02:32:00.049967', 'step': 5976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:00.080349', 'step': 5976, 'epoch': 3} {'type': 'loss', 'content': 0.0002822635869961232, 'timestamp': '2025-09-10 02:32:00.082394', 'step': 5977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:00.111382', 'step': 5977, 'epoch': 3} {'type': 'loss', 'content': 0.0005551199428737164, 'timestamp': '2025-09-10 02:32:00.113076', 'step': 5978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:00.142619', 'step': 5978, 'epoch': 3} {'type': 'loss', 'content': 0.0003631310537457466, 'timestamp': '2025-09-10 02:32:00.144683', 'step': 5979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:00.174555', 'step': 5979, 'epoch': 3} {'type': 'loss', 'content': 0.00014048561570234597, 'timestamp': '2025-09-10 02:32:00.198000', 'step': 5980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:00.227229', 'step': 5980, 'epoch': 3} {'type': 'loss', 'content': 0.00020421433146111667, 'timestamp': '2025-09-10 02:32:00.229054', 'step': 5981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:00.257730', 'step': 5981, 'epoch': 3} {'type': 'loss', 'content': 0.0002952653740067035, 'timestamp': '2025-09-10 02:32:00.259577', 'step': 5982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:00.288274', 'step': 5982, 'epoch': 3} {'type': 'loss', 'content': 0.004466590005904436, 'timestamp': '2025-09-10 02:32:00.290089', 'step': 5983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:00.319073', 'step': 5983, 'epoch': 3} {'type': 'loss', 'content': 0.00020575194503180683, 'timestamp': '2025-09-10 02:32:00.342312', 'step': 5984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:00.371529', 'step': 5984, 'epoch': 3} {'type': 'loss', 'content': 0.00958172231912613, 'timestamp': '2025-09-10 02:32:00.373424', 'step': 5985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:00.403076', 'step': 5985, 'epoch': 3} {'type': 'loss', 'content': 0.00015885476022958755, 'timestamp': '2025-09-10 02:32:00.404881', 'step': 5986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:00.433719', 'step': 5986, 'epoch': 3} {'type': 'loss', 'content': 0.0006440441939048469, 'timestamp': '2025-09-10 02:32:00.435877', 'step': 5987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:00.464931', 'step': 5987, 'epoch': 3} {'type': 'loss', 'content': 0.00075487419962883, 'timestamp': '2025-09-10 02:32:00.488267', 'step': 5988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:00.516934', 'step': 5988, 'epoch': 3} {'type': 'loss', 'content': 0.0002352559968130663, 'timestamp': '2025-09-10 02:32:00.518692', 'step': 5989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:00.547333', 'step': 5989, 'epoch': 3} {'type': 'loss', 'content': 0.00017876646597869694, 'timestamp': '2025-09-10 02:32:00.549207', 'step': 5990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:00.578010', 'step': 5990, 'epoch': 3} {'type': 'loss', 'content': 0.0005139493150636554, 'timestamp': '2025-09-10 02:32:00.579764', 'step': 5991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:00.608622', 'step': 5991, 'epoch': 3} {'type': 'loss', 'content': 0.0018390259938314557, 'timestamp': '2025-09-10 02:32:00.631911', 'step': 5992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:00.660834', 'step': 5992, 'epoch': 3} {'type': 'loss', 'content': 0.0001986074639717117, 'timestamp': '2025-09-10 02:32:00.662890', 'step': 5993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:00.692434', 'step': 5993, 'epoch': 3} {'type': 'loss', 'content': 0.0002618823782540858, 'timestamp': '2025-09-10 02:32:00.694355', 'step': 5994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:00.724449', 'step': 5994, 'epoch': 3} {'type': 'loss', 'content': 0.00036912746145389974, 'timestamp': '2025-09-10 02:32:00.726562', 'step': 5995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:00.755651', 'step': 5995, 'epoch': 3} {'type': 'loss', 'content': 0.0005986754549667239, 'timestamp': '2025-09-10 02:32:00.779150', 'step': 5996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:00.808677', 'step': 5996, 'epoch': 3} {'type': 'loss', 'content': 0.0009047608473338187, 'timestamp': '2025-09-10 02:32:00.810347', 'step': 5997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:00.839162', 'step': 5997, 'epoch': 3} {'type': 'loss', 'content': 0.000149157625855878, 'timestamp': '2025-09-10 02:32:00.840915', 'step': 5998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:00.869577', 'step': 5998, 'epoch': 3} {'type': 'loss', 'content': 0.00014819527859799564, 'timestamp': '2025-09-10 02:32:00.871249', 'step': 5999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:00.900053', 'step': 5999, 'epoch': 3} {'type': 'loss', 'content': 0.0021587680093944073, 'timestamp': '2025-09-10 02:32:00.923086', 'step': 6000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 6000', 'timestamp': '2025-09-10 02:32:05.795377', 'step': 6000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:05.838231', 'step': 6000, 'epoch': 3} {'type': 'loss', 'content': 0.0004505121323745698, 'timestamp': '2025-09-10 02:32:05.840064', 'step': 6001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:05.869305', 'step': 6001, 'epoch': 3} {'type': 'loss', 'content': 0.000520777830388397, 'timestamp': '2025-09-10 02:32:05.871307', 'step': 6002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:05.900587', 'step': 6002, 'epoch': 3} {'type': 'loss', 'content': 0.00018003354489337653, 'timestamp': '2025-09-10 02:32:05.902259', 'step': 6003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:05.931803', 'step': 6003, 'epoch': 3} {'type': 'loss', 'content': 8.050748147070408e-05, 'timestamp': '2025-09-10 02:32:05.955549', 'step': 6004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:05.984504', 'step': 6004, 'epoch': 3} {'type': 'loss', 'content': 8.989882189780474e-05, 'timestamp': '2025-09-10 02:32:05.986420', 'step': 6005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:06.015704', 'step': 6005, 'epoch': 3} {'type': 'loss', 'content': 0.00030490991775877774, 'timestamp': '2025-09-10 02:32:06.017549', 'step': 6006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:06.046561', 'step': 6006, 'epoch': 3} {'type': 'loss', 'content': 0.001373602426610887, 'timestamp': '2025-09-10 02:32:06.048419', 'step': 6007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:06.078898', 'step': 6007, 'epoch': 3} {'type': 'loss', 'content': 0.001475251279771328, 'timestamp': '2025-09-10 02:32:06.102602', 'step': 6008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:06.132082', 'step': 6008, 'epoch': 3} {'type': 'loss', 'content': 0.00022059862385503948, 'timestamp': '2025-09-10 02:32:06.134168', 'step': 6009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:06.163639', 'step': 6009, 'epoch': 3} {'type': 'loss', 'content': 0.00019312952645123005, 'timestamp': '2025-09-10 02:32:06.165340', 'step': 6010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:06.194330', 'step': 6010, 'epoch': 3} {'type': 'loss', 'content': 0.0001684818707872182, 'timestamp': '2025-09-10 02:32:06.196276', 'step': 6011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:06.225644', 'step': 6011, 'epoch': 3} {'type': 'loss', 'content': 0.021128688007593155, 'timestamp': '2025-09-10 02:32:06.249149', 'step': 6012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:06.278088', 'step': 6012, 'epoch': 3} {'type': 'loss', 'content': 0.017196234315633774, 'timestamp': '2025-09-10 02:32:06.280130', 'step': 6013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:06.308736', 'step': 6013, 'epoch': 3} {'type': 'loss', 'content': 0.003722748253494501, 'timestamp': '2025-09-10 02:32:06.310639', 'step': 6014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:06.339383', 'step': 6014, 'epoch': 3} {'type': 'loss', 'content': 0.00491055753082037, 'timestamp': '2025-09-10 02:32:06.341137', 'step': 6015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:06.370304', 'step': 6015, 'epoch': 3} {'type': 'loss', 'content': 0.00023628502094652504, 'timestamp': '2025-09-10 02:32:06.393682', 'step': 6016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:06.422606', 'step': 6016, 'epoch': 3} {'type': 'loss', 'content': 0.0003611140127759427, 'timestamp': '2025-09-10 02:32:06.424446', 'step': 6017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:06.453356', 'step': 6017, 'epoch': 3} {'type': 'loss', 'content': 0.0001540749944979325, 'timestamp': '2025-09-10 02:32:06.455102', 'step': 6018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:06.484665', 'step': 6018, 'epoch': 3} {'type': 'loss', 'content': 0.01930735446512699, 'timestamp': '2025-09-10 02:32:06.486472', 'step': 6019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:06.515802', 'step': 6019, 'epoch': 3} {'type': 'loss', 'content': 0.0002698621538002044, 'timestamp': '2025-09-10 02:32:06.539299', 'step': 6020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:06.568267', 'step': 6020, 'epoch': 3} {'type': 'loss', 'content': 0.0005506637971848249, 'timestamp': '2025-09-10 02:32:06.570320', 'step': 6021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:06.599465', 'step': 6021, 'epoch': 3} {'type': 'loss', 'content': 0.0008389264112338424, 'timestamp': '2025-09-10 02:32:06.601342', 'step': 6022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:06.630001', 'step': 6022, 'epoch': 3} {'type': 'loss', 'content': 0.0007258251425810158, 'timestamp': '2025-09-10 02:32:06.632009', 'step': 6023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:06.660891', 'step': 6023, 'epoch': 3} {'type': 'loss', 'content': 0.001435625716112554, 'timestamp': '2025-09-10 02:32:06.684351', 'step': 6024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:06.713960', 'step': 6024, 'epoch': 3} {'type': 'loss', 'content': 0.00012985989451408386, 'timestamp': '2025-09-10 02:32:06.715693', 'step': 6025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:06.744482', 'step': 6025, 'epoch': 3} {'type': 'loss', 'content': 0.0029376517049968243, 'timestamp': '2025-09-10 02:32:06.746454', 'step': 6026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:06.775262', 'step': 6026, 'epoch': 3} {'type': 'loss', 'content': 0.035706792026758194, 'timestamp': '2025-09-10 02:32:06.777117', 'step': 6027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:06.805919', 'step': 6027, 'epoch': 3} {'type': 'loss', 'content': 0.0008659813902340829, 'timestamp': '2025-09-10 02:32:06.829279', 'step': 6028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:06.859088', 'step': 6028, 'epoch': 3} {'type': 'loss', 'content': 8.571484795538709e-05, 'timestamp': '2025-09-10 02:32:06.860851', 'step': 6029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:06.889448', 'step': 6029, 'epoch': 3} {'type': 'loss', 'content': 0.030303040519356728, 'timestamp': '2025-09-10 02:32:06.891379', 'step': 6030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:06.920440', 'step': 6030, 'epoch': 3} {'type': 'loss', 'content': 0.0005027134902775288, 'timestamp': '2025-09-10 02:32:06.922060', 'step': 6031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:06.950882', 'step': 6031, 'epoch': 3} {'type': 'loss', 'content': 0.00012944061018060893, 'timestamp': '2025-09-10 02:32:06.974288', 'step': 6032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:07.003277', 'step': 6032, 'epoch': 3} {'type': 'loss', 'content': 0.05250001698732376, 'timestamp': '2025-09-10 02:32:07.005201', 'step': 6033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:07.034322', 'step': 6033, 'epoch': 3} {'type': 'loss', 'content': 0.00019380975572858006, 'timestamp': '2025-09-10 02:32:07.036135', 'step': 6034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:07.064912', 'step': 6034, 'epoch': 3} {'type': 'loss', 'content': 0.0017998889088630676, 'timestamp': '2025-09-10 02:32:07.066535', 'step': 6035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:07.095593', 'step': 6035, 'epoch': 3} {'type': 'loss', 'content': 0.0006400958518497646, 'timestamp': '2025-09-10 02:32:07.119021', 'step': 6036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:07.148221', 'step': 6036, 'epoch': 3} {'type': 'loss', 'content': 0.0007847324013710022, 'timestamp': '2025-09-10 02:32:07.150461', 'step': 6037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:07.180865', 'step': 6037, 'epoch': 3} {'type': 'loss', 'content': 0.0006318576051853597, 'timestamp': '2025-09-10 02:32:07.187546', 'step': 6038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:07.221553', 'step': 6038, 'epoch': 3} {'type': 'loss', 'content': 0.0011142324656248093, 'timestamp': '2025-09-10 02:32:07.225696', 'step': 6039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:07.279880', 'step': 6039, 'epoch': 3} {'type': 'loss', 'content': 0.0009587918757461011, 'timestamp': '2025-09-10 02:32:07.306949', 'step': 6040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:07.352422', 'step': 6040, 'epoch': 3} {'type': 'loss', 'content': 0.0011987646576017141, 'timestamp': '2025-09-10 02:32:07.356975', 'step': 6041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:07.398357', 'step': 6041, 'epoch': 3} {'type': 'loss', 'content': 0.00021913684031460434, 'timestamp': '2025-09-10 02:32:07.400289', 'step': 6042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:07.434739', 'step': 6042, 'epoch': 3} {'type': 'loss', 'content': 0.0005295642768032849, 'timestamp': '2025-09-10 02:32:07.436606', 'step': 6043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:07.468585', 'step': 6043, 'epoch': 3} {'type': 'loss', 'content': 0.023086709901690483, 'timestamp': '2025-09-10 02:32:07.491873', 'step': 6044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:07.525746', 'step': 6044, 'epoch': 3} {'type': 'loss', 'content': 0.00017024595581460744, 'timestamp': '2025-09-10 02:32:07.527332', 'step': 6045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:07.559610', 'step': 6045, 'epoch': 3} {'type': 'loss', 'content': 0.00031210650922730565, 'timestamp': '2025-09-10 02:32:07.561309', 'step': 6046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:07.595024', 'step': 6046, 'epoch': 3} {'type': 'loss', 'content': 0.00016917231550905854, 'timestamp': '2025-09-10 02:32:07.596887', 'step': 6047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:07.627519', 'step': 6047, 'epoch': 3} {'type': 'loss', 'content': 0.0007761456654407084, 'timestamp': '2025-09-10 02:32:07.650866', 'step': 6048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:07.687084', 'step': 6048, 'epoch': 3} {'type': 'loss', 'content': 0.0005857849610038102, 'timestamp': '2025-09-10 02:32:07.688908', 'step': 6049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:07.727869', 'step': 6049, 'epoch': 3} {'type': 'loss', 'content': 0.0005356850451789796, 'timestamp': '2025-09-10 02:32:07.729880', 'step': 6050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:07.766812', 'step': 6050, 'epoch': 3} {'type': 'loss', 'content': 0.00037692865589633584, 'timestamp': '2025-09-10 02:32:07.768772', 'step': 6051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:07.806412', 'step': 6051, 'epoch': 3} {'type': 'loss', 'content': 0.0003155084268655628, 'timestamp': '2025-09-10 02:32:07.829888', 'step': 6052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:07.858316', 'step': 6052, 'epoch': 3} {'type': 'loss', 'content': 0.0009529165108688176, 'timestamp': '2025-09-10 02:32:07.860454', 'step': 6053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:07.889094', 'step': 6053, 'epoch': 3} {'type': 'loss', 'content': 0.016368014737963676, 'timestamp': '2025-09-10 02:32:07.890767', 'step': 6054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:07.919548', 'step': 6054, 'epoch': 3} {'type': 'loss', 'content': 0.0017024686094373465, 'timestamp': '2025-09-10 02:32:07.921541', 'step': 6055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:07.950547', 'step': 6055, 'epoch': 3} {'type': 'loss', 'content': 0.05321263521909714, 'timestamp': '2025-09-10 02:32:07.973936', 'step': 6056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:08.002769', 'step': 6056, 'epoch': 3} {'type': 'loss', 'content': 0.013266685418784618, 'timestamp': '2025-09-10 02:32:08.004663', 'step': 6057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:08.033701', 'step': 6057, 'epoch': 3} {'type': 'loss', 'content': 0.0016620157985016704, 'timestamp': '2025-09-10 02:32:08.035567', 'step': 6058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:08.064407', 'step': 6058, 'epoch': 3} {'type': 'loss', 'content': 0.00020325240620877594, 'timestamp': '2025-09-10 02:32:08.066196', 'step': 6059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:08.095052', 'step': 6059, 'epoch': 3} {'type': 'loss', 'content': 0.0015133811393752694, 'timestamp': '2025-09-10 02:32:08.118275', 'step': 6060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:08.147581', 'step': 6060, 'epoch': 3} {'type': 'loss', 'content': 0.00016656095976941288, 'timestamp': '2025-09-10 02:32:08.149624', 'step': 6061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:08.179411', 'step': 6061, 'epoch': 3} {'type': 'loss', 'content': 0.0001438201143173501, 'timestamp': '2025-09-10 02:32:08.181391', 'step': 6062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:08.210419', 'step': 6062, 'epoch': 3} {'type': 'loss', 'content': 0.0006453626556321979, 'timestamp': '2025-09-10 02:32:08.212326', 'step': 6063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:08.241145', 'step': 6063, 'epoch': 3} {'type': 'loss', 'content': 0.00026031830930151045, 'timestamp': '2025-09-10 02:32:08.264551', 'step': 6064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:08.293793', 'step': 6064, 'epoch': 3} {'type': 'loss', 'content': 0.004452974535524845, 'timestamp': '2025-09-10 02:32:08.295904', 'step': 6065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:08.325015', 'step': 6065, 'epoch': 3} {'type': 'loss', 'content': 0.0009710406302474439, 'timestamp': '2025-09-10 02:32:08.326915', 'step': 6066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:08.356192', 'step': 6066, 'epoch': 3} {'type': 'loss', 'content': 0.00012414071534294635, 'timestamp': '2025-09-10 02:32:08.357959', 'step': 6067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:08.386624', 'step': 6067, 'epoch': 3} {'type': 'loss', 'content': 0.02108047343790531, 'timestamp': '2025-09-10 02:32:08.410156', 'step': 6068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:08.439257', 'step': 6068, 'epoch': 3} {'type': 'loss', 'content': 0.0010208667954429984, 'timestamp': '2025-09-10 02:32:08.441119', 'step': 6069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:08.469891', 'step': 6069, 'epoch': 3} {'type': 'loss', 'content': 0.0005744010559283197, 'timestamp': '2025-09-10 02:32:08.471673', 'step': 6070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:08.500236', 'step': 6070, 'epoch': 3} {'type': 'loss', 'content': 0.0001902828662423417, 'timestamp': '2025-09-10 02:32:08.502083', 'step': 6071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:08.531175', 'step': 6071, 'epoch': 3} {'type': 'loss', 'content': 0.0005153705715201795, 'timestamp': '2025-09-10 02:32:08.554663', 'step': 6072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:08.586075', 'step': 6072, 'epoch': 3} {'type': 'loss', 'content': 0.00047756050480529666, 'timestamp': '2025-09-10 02:32:08.587878', 'step': 6073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:08.617095', 'step': 6073, 'epoch': 3} {'type': 'loss', 'content': 0.0004439638287294656, 'timestamp': '2025-09-10 02:32:08.619091', 'step': 6074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:08.647926', 'step': 6074, 'epoch': 3} {'type': 'loss', 'content': 0.009202768094837666, 'timestamp': '2025-09-10 02:32:08.649672', 'step': 6075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:08.679044', 'step': 6075, 'epoch': 3} {'type': 'loss', 'content': 0.001354446285404265, 'timestamp': '2025-09-10 02:32:08.702500', 'step': 6076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:08.731702', 'step': 6076, 'epoch': 3} {'type': 'loss', 'content': 0.004026635084301233, 'timestamp': '2025-09-10 02:32:08.733674', 'step': 6077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:08.762480', 'step': 6077, 'epoch': 3} {'type': 'loss', 'content': 0.0003100039029959589, 'timestamp': '2025-09-10 02:32:08.764300', 'step': 6078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:08.793548', 'step': 6078, 'epoch': 3} {'type': 'loss', 'content': 0.00024007308820728213, 'timestamp': '2025-09-10 02:32:08.795628', 'step': 6079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:08.824959', 'step': 6079, 'epoch': 3} {'type': 'loss', 'content': 0.0005758529296144843, 'timestamp': '2025-09-10 02:32:08.848332', 'step': 6080, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1582003754624}], 'timestamp': '2025-09-10 02:32:10.722930', 'step': 6080, 'epoch': 3} {'type': 'pplx', 'content': 2645955.2737260633, 'timestamp': '2025-09-10 02:32:10.724976', 'step': 6080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:10.752615', 'step': 6080, 'epoch': 3} {'type': 'loss', 'content': 0.0002218651061411947, 'timestamp': '2025-09-10 02:32:10.754453', 'step': 6081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:10.785046', 'step': 6081, 'epoch': 3} {'type': 'loss', 'content': 0.0007585693383589387, 'timestamp': '2025-09-10 02:32:10.786867', 'step': 6082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:10.815534', 'step': 6082, 'epoch': 3} {'type': 'loss', 'content': 0.0015208182157948613, 'timestamp': '2025-09-10 02:32:10.817443', 'step': 6083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:10.846799', 'step': 6083, 'epoch': 3} {'type': 'loss', 'content': 0.0015198250766843557, 'timestamp': '2025-09-10 02:32:10.870122', 'step': 6084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:10.899263', 'step': 6084, 'epoch': 3} {'type': 'loss', 'content': 0.0004692175716627389, 'timestamp': '2025-09-10 02:32:10.901106', 'step': 6085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:10.929635', 'step': 6085, 'epoch': 3} {'type': 'loss', 'content': 0.0025713664945214987, 'timestamp': '2025-09-10 02:32:10.931133', 'step': 6086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:10.960013', 'step': 6086, 'epoch': 3} {'type': 'loss', 'content': 0.001673850929364562, 'timestamp': '2025-09-10 02:32:10.961753', 'step': 6087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:10.990391', 'step': 6087, 'epoch': 3} {'type': 'loss', 'content': 0.0027655635494738817, 'timestamp': '2025-09-10 02:32:11.013827', 'step': 6088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:11.043082', 'step': 6088, 'epoch': 3} {'type': 'loss', 'content': 0.00013074224989395589, 'timestamp': '2025-09-10 02:32:11.045104', 'step': 6089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:11.074067', 'step': 6089, 'epoch': 3} {'type': 'loss', 'content': 0.026914628222584724, 'timestamp': '2025-09-10 02:32:11.075879', 'step': 6090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:11.104850', 'step': 6090, 'epoch': 3} {'type': 'loss', 'content': 0.0005443138652481139, 'timestamp': '2025-09-10 02:32:11.106356', 'step': 6091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:11.135142', 'step': 6091, 'epoch': 3} {'type': 'loss', 'content': 0.00029482325771823525, 'timestamp': '2025-09-10 02:32:11.158546', 'step': 6092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:11.188627', 'step': 6092, 'epoch': 3} {'type': 'loss', 'content': 0.0010598287917673588, 'timestamp': '2025-09-10 02:32:11.190415', 'step': 6093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:11.221662', 'step': 6093, 'epoch': 3} {'type': 'loss', 'content': 0.015348801389336586, 'timestamp': '2025-09-10 02:32:11.223412', 'step': 6094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:11.256660', 'step': 6094, 'epoch': 3} {'type': 'loss', 'content': 0.00018287813873030245, 'timestamp': '2025-09-10 02:32:11.258924', 'step': 6095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:11.287545', 'step': 6095, 'epoch': 3} {'type': 'loss', 'content': 0.0003591575659811497, 'timestamp': '2025-09-10 02:32:11.310904', 'step': 6096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:11.342593', 'step': 6096, 'epoch': 3} {'type': 'loss', 'content': 0.0528402216732502, 'timestamp': '2025-09-10 02:32:11.344587', 'step': 6097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:11.375959', 'step': 6097, 'epoch': 3} {'type': 'loss', 'content': 0.006955190096050501, 'timestamp': '2025-09-10 02:32:11.377858', 'step': 6098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:11.411720', 'step': 6098, 'epoch': 3} {'type': 'loss', 'content': 0.0035176179371774197, 'timestamp': '2025-09-10 02:32:11.413503', 'step': 6099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:11.445858', 'step': 6099, 'epoch': 3} {'type': 'loss', 'content': 0.015753187239170074, 'timestamp': '2025-09-10 02:32:11.469280', 'step': 6100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:11.505086', 'step': 6100, 'epoch': 3} {'type': 'loss', 'content': 0.0005709449178539217, 'timestamp': '2025-09-10 02:32:11.507174', 'step': 6101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:11.538614', 'step': 6101, 'epoch': 3} {'type': 'loss', 'content': 0.0005121551221236587, 'timestamp': '2025-09-10 02:32:11.540708', 'step': 6102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:11.575806', 'step': 6102, 'epoch': 3} {'type': 'loss', 'content': 0.003526447806507349, 'timestamp': '2025-09-10 02:32:11.577696', 'step': 6103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:11.609683', 'step': 6103, 'epoch': 3} {'type': 'loss', 'content': 0.00563672324642539, 'timestamp': '2025-09-10 02:32:11.633388', 'step': 6104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:11.664181', 'step': 6104, 'epoch': 3} {'type': 'loss', 'content': 0.034490954130887985, 'timestamp': '2025-09-10 02:32:11.665943', 'step': 6105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:11.703553', 'step': 6105, 'epoch': 3} {'type': 'loss', 'content': 0.0005361035582609475, 'timestamp': '2025-09-10 02:32:11.705535', 'step': 6106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:11.743083', 'step': 6106, 'epoch': 3} {'type': 'loss', 'content': 0.00022824991901870817, 'timestamp': '2025-09-10 02:32:11.744860', 'step': 6107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:11.778747', 'step': 6107, 'epoch': 3} {'type': 'loss', 'content': 0.00033912801882252097, 'timestamp': '2025-09-10 02:32:11.802065', 'step': 6108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:11.831090', 'step': 6108, 'epoch': 3} {'type': 'loss', 'content': 0.0004516389162745327, 'timestamp': '2025-09-10 02:32:11.832796', 'step': 6109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:11.861531', 'step': 6109, 'epoch': 3} {'type': 'loss', 'content': 0.0016965868417173624, 'timestamp': '2025-09-10 02:32:11.863190', 'step': 6110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:11.892382', 'step': 6110, 'epoch': 3} {'type': 'loss', 'content': 0.0002465526922605932, 'timestamp': '2025-09-10 02:32:11.893978', 'step': 6111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:11.923297', 'step': 6111, 'epoch': 3} {'type': 'loss', 'content': 0.0005807014531455934, 'timestamp': '2025-09-10 02:32:11.946521', 'step': 6112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:11.975691', 'step': 6112, 'epoch': 3} {'type': 'loss', 'content': 0.000393257214454934, 'timestamp': '2025-09-10 02:32:11.977596', 'step': 6113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:12.006656', 'step': 6113, 'epoch': 3} {'type': 'loss', 'content': 0.039987385272979736, 'timestamp': '2025-09-10 02:32:12.008545', 'step': 6114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:12.037688', 'step': 6114, 'epoch': 3} {'type': 'loss', 'content': 0.0008695316500961781, 'timestamp': '2025-09-10 02:32:12.039677', 'step': 6115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:12.068398', 'step': 6115, 'epoch': 3} {'type': 'loss', 'content': 0.010479303076863289, 'timestamp': '2025-09-10 02:32:12.091893', 'step': 6116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:12.121026', 'step': 6116, 'epoch': 3} {'type': 'loss', 'content': 0.0006460921722464263, 'timestamp': '2025-09-10 02:32:12.123141', 'step': 6117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:12.152401', 'step': 6117, 'epoch': 3} {'type': 'loss', 'content': 0.01761404611170292, 'timestamp': '2025-09-10 02:32:12.154498', 'step': 6118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:12.183639', 'step': 6118, 'epoch': 3} {'type': 'loss', 'content': 0.0028172603342682123, 'timestamp': '2025-09-10 02:32:12.185574', 'step': 6119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:12.214910', 'step': 6119, 'epoch': 3} {'type': 'loss', 'content': 0.0009161824127659202, 'timestamp': '2025-09-10 02:32:12.238248', 'step': 6120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:12.267956', 'step': 6120, 'epoch': 3} {'type': 'loss', 'content': 0.000278738618362695, 'timestamp': '2025-09-10 02:32:12.270159', 'step': 6121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:12.299252', 'step': 6121, 'epoch': 3} {'type': 'loss', 'content': 0.0011909457389265299, 'timestamp': '2025-09-10 02:32:12.301256', 'step': 6122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:12.330692', 'step': 6122, 'epoch': 3} {'type': 'loss', 'content': 0.0005585025646723807, 'timestamp': '2025-09-10 02:32:12.332580', 'step': 6123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:12.361637', 'step': 6123, 'epoch': 3} {'type': 'loss', 'content': 0.0004416074953041971, 'timestamp': '2025-09-10 02:32:12.384629', 'step': 6124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:12.413611', 'step': 6124, 'epoch': 3} {'type': 'loss', 'content': 0.03923536464571953, 'timestamp': '2025-09-10 02:32:12.415210', 'step': 6125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:12.444017', 'step': 6125, 'epoch': 3} {'type': 'loss', 'content': 0.0012181001948192716, 'timestamp': '2025-09-10 02:32:12.445942', 'step': 6126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:12.475024', 'step': 6126, 'epoch': 3} {'type': 'loss', 'content': 0.0029334353748708963, 'timestamp': '2025-09-10 02:32:12.476755', 'step': 6127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:12.505343', 'step': 6127, 'epoch': 3} {'type': 'loss', 'content': 0.009986157529056072, 'timestamp': '2025-09-10 02:32:12.528831', 'step': 6128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:12.557868', 'step': 6128, 'epoch': 3} {'type': 'loss', 'content': 0.021301427856087685, 'timestamp': '2025-09-10 02:32:12.559807', 'step': 6129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:12.588302', 'step': 6129, 'epoch': 3} {'type': 'loss', 'content': 0.0011228956282138824, 'timestamp': '2025-09-10 02:32:12.590059', 'step': 6130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:12.618629', 'step': 6130, 'epoch': 3} {'type': 'loss', 'content': 0.000370693946024403, 'timestamp': '2025-09-10 02:32:12.620467', 'step': 6131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:12.649412', 'step': 6131, 'epoch': 3} {'type': 'loss', 'content': 0.0028962441720068455, 'timestamp': '2025-09-10 02:32:12.672855', 'step': 6132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:12.701800', 'step': 6132, 'epoch': 3} {'type': 'loss', 'content': 0.00017247784126084298, 'timestamp': '2025-09-10 02:32:12.703995', 'step': 6133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:12.732710', 'step': 6133, 'epoch': 3} {'type': 'loss', 'content': 0.0002645184868015349, 'timestamp': '2025-09-10 02:32:12.735901', 'step': 6134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:12.767336', 'step': 6134, 'epoch': 3} {'type': 'loss', 'content': 0.0008119445410557091, 'timestamp': '2025-09-10 02:32:12.769082', 'step': 6135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:12.802533', 'step': 6135, 'epoch': 3} {'type': 'loss', 'content': 0.0010705316672101617, 'timestamp': '2025-09-10 02:32:12.825864', 'step': 6136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:12.854767', 'step': 6136, 'epoch': 3} {'type': 'loss', 'content': 0.007430485915392637, 'timestamp': '2025-09-10 02:32:12.856426', 'step': 6137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:12.885400', 'step': 6137, 'epoch': 3} {'type': 'loss', 'content': 0.017648329958319664, 'timestamp': '2025-09-10 02:32:12.887352', 'step': 6138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:12.916699', 'step': 6138, 'epoch': 3} {'type': 'loss', 'content': 0.0019228752935305238, 'timestamp': '2025-09-10 02:32:12.918559', 'step': 6139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:12.947597', 'step': 6139, 'epoch': 3} {'type': 'loss', 'content': 0.0014111108612269163, 'timestamp': '2025-09-10 02:32:12.971073', 'step': 6140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:12.999730', 'step': 6140, 'epoch': 3} {'type': 'loss', 'content': 0.0016826939536258578, 'timestamp': '2025-09-10 02:32:13.002179', 'step': 6141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:13.032704', 'step': 6141, 'epoch': 3} {'type': 'loss', 'content': 0.0007334706024266779, 'timestamp': '2025-09-10 02:32:13.034532', 'step': 6142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:13.063618', 'step': 6142, 'epoch': 3} {'type': 'loss', 'content': 0.00017897525685839355, 'timestamp': '2025-09-10 02:32:13.065570', 'step': 6143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:13.094353', 'step': 6143, 'epoch': 3} {'type': 'loss', 'content': 0.0010283568408340216, 'timestamp': '2025-09-10 02:32:13.117707', 'step': 6144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:13.147305', 'step': 6144, 'epoch': 3} {'type': 'loss', 'content': 0.0006188590778037906, 'timestamp': '2025-09-10 02:32:13.149186', 'step': 6145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:13.180082', 'step': 6145, 'epoch': 3} {'type': 'loss', 'content': 0.0004881804343312979, 'timestamp': '2025-09-10 02:32:13.181820', 'step': 6146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:13.214603', 'step': 6146, 'epoch': 3} {'type': 'loss', 'content': 0.000541047949809581, 'timestamp': '2025-09-10 02:32:13.216744', 'step': 6147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:13.251832', 'step': 6147, 'epoch': 3} {'type': 'loss', 'content': 0.005712251644581556, 'timestamp': '2025-09-10 02:32:13.275138', 'step': 6148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:13.304284', 'step': 6148, 'epoch': 3} {'type': 'loss', 'content': 0.001517638680525124, 'timestamp': '2025-09-10 02:32:13.306203', 'step': 6149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:13.336477', 'step': 6149, 'epoch': 3} {'type': 'loss', 'content': 0.0017551190685480833, 'timestamp': '2025-09-10 02:32:13.338432', 'step': 6150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:13.368942', 'step': 6150, 'epoch': 3} {'type': 'loss', 'content': 0.0005528161418624222, 'timestamp': '2025-09-10 02:32:13.370911', 'step': 6151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:13.404658', 'step': 6151, 'epoch': 3} {'type': 'loss', 'content': 0.000431155989645049, 'timestamp': '2025-09-10 02:32:13.428213', 'step': 6152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:13.457744', 'step': 6152, 'epoch': 3} {'type': 'loss', 'content': 0.010887889191508293, 'timestamp': '2025-09-10 02:32:13.459780', 'step': 6153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:13.496303', 'step': 6153, 'epoch': 3} {'type': 'loss', 'content': 0.004278097301721573, 'timestamp': '2025-09-10 02:32:13.498321', 'step': 6154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:13.531496', 'step': 6154, 'epoch': 3} {'type': 'loss', 'content': 0.010864503681659698, 'timestamp': '2025-09-10 02:32:13.533306', 'step': 6155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:13.566594', 'step': 6155, 'epoch': 3} {'type': 'loss', 'content': 0.004223777912557125, 'timestamp': '2025-09-10 02:32:13.590021', 'step': 6156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:13.621893', 'step': 6156, 'epoch': 3} {'type': 'loss', 'content': 0.00029171284404583275, 'timestamp': '2025-09-10 02:32:13.623788', 'step': 6157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:13.657131', 'step': 6157, 'epoch': 3} {'type': 'loss', 'content': 0.0011619922006502748, 'timestamp': '2025-09-10 02:32:13.659086', 'step': 6158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:13.698307', 'step': 6158, 'epoch': 3} {'type': 'loss', 'content': 0.0005122654838487506, 'timestamp': '2025-09-10 02:32:13.700086', 'step': 6159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:13.738298', 'step': 6159, 'epoch': 3} {'type': 'loss', 'content': 0.00033908014302141964, 'timestamp': '2025-09-10 02:32:13.762018', 'step': 6160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:13.798997', 'step': 6160, 'epoch': 3} {'type': 'loss', 'content': 0.02243952453136444, 'timestamp': '2025-09-10 02:32:13.801122', 'step': 6161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:13.830500', 'step': 6161, 'epoch': 3} {'type': 'loss', 'content': 0.007921956479549408, 'timestamp': '2025-09-10 02:32:13.833682', 'step': 6162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:13.865096', 'step': 6162, 'epoch': 3} {'type': 'loss', 'content': 0.0003726438444573432, 'timestamp': '2025-09-10 02:32:13.867165', 'step': 6163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:13.895969', 'step': 6163, 'epoch': 3} {'type': 'loss', 'content': 0.00021600746549665928, 'timestamp': '2025-09-10 02:32:13.919516', 'step': 6164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:13.949089', 'step': 6164, 'epoch': 3} {'type': 'loss', 'content': 0.00013112745364196599, 'timestamp': '2025-09-10 02:32:13.951125', 'step': 6165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:13.980350', 'step': 6165, 'epoch': 3} {'type': 'loss', 'content': 0.00412676902487874, 'timestamp': '2025-09-10 02:32:13.982241', 'step': 6166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:14.011304', 'step': 6166, 'epoch': 3} {'type': 'loss', 'content': 0.004285149276256561, 'timestamp': '2025-09-10 02:32:14.013358', 'step': 6167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:14.042800', 'step': 6167, 'epoch': 3} {'type': 'loss', 'content': 0.0004469568666536361, 'timestamp': '2025-09-10 02:32:14.066299', 'step': 6168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:14.096145', 'step': 6168, 'epoch': 3} {'type': 'loss', 'content': 0.0006515326676890254, 'timestamp': '2025-09-10 02:32:14.098098', 'step': 6169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:14.127159', 'step': 6169, 'epoch': 3} {'type': 'loss', 'content': 0.0013496106257662177, 'timestamp': '2025-09-10 02:32:14.128814', 'step': 6170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:14.158335', 'step': 6170, 'epoch': 3} {'type': 'loss', 'content': 0.0009292674367316067, 'timestamp': '2025-09-10 02:32:14.160227', 'step': 6171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:14.189352', 'step': 6171, 'epoch': 3} {'type': 'loss', 'content': 0.0011377623304724693, 'timestamp': '2025-09-10 02:32:14.212897', 'step': 6172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:14.242531', 'step': 6172, 'epoch': 3} {'type': 'loss', 'content': 0.01203919854015112, 'timestamp': '2025-09-10 02:32:14.244487', 'step': 6173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:14.273885', 'step': 6173, 'epoch': 3} {'type': 'loss', 'content': 0.00012295367196202278, 'timestamp': '2025-09-10 02:32:14.275587', 'step': 6174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:14.304489', 'step': 6174, 'epoch': 3} {'type': 'loss', 'content': 0.00014454529446084052, 'timestamp': '2025-09-10 02:32:14.306216', 'step': 6175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:14.335291', 'step': 6175, 'epoch': 3} {'type': 'loss', 'content': 0.00014045732677914202, 'timestamp': '2025-09-10 02:32:14.358860', 'step': 6176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:14.388401', 'step': 6176, 'epoch': 3} {'type': 'loss', 'content': 0.00039911657222546637, 'timestamp': '2025-09-10 02:32:14.390230', 'step': 6177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:14.420161', 'step': 6177, 'epoch': 3} {'type': 'loss', 'content': 0.0013413883280009031, 'timestamp': '2025-09-10 02:32:14.422011', 'step': 6178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:14.451483', 'step': 6178, 'epoch': 3} {'type': 'loss', 'content': 0.00018301047384738922, 'timestamp': '2025-09-10 02:32:14.453476', 'step': 6179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:14.482436', 'step': 6179, 'epoch': 3} {'type': 'loss', 'content': 0.0018863353179767728, 'timestamp': '2025-09-10 02:32:14.505809', 'step': 6180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:14.535443', 'step': 6180, 'epoch': 3} {'type': 'loss', 'content': 0.00019373593386262655, 'timestamp': '2025-09-10 02:32:14.537403', 'step': 6181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:14.566887', 'step': 6181, 'epoch': 3} {'type': 'loss', 'content': 0.03961193934082985, 'timestamp': '2025-09-10 02:32:14.568823', 'step': 6182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:14.598044', 'step': 6182, 'epoch': 3} {'type': 'loss', 'content': 0.005647978745400906, 'timestamp': '2025-09-10 02:32:14.599915', 'step': 6183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:14.628906', 'step': 6183, 'epoch': 3} {'type': 'loss', 'content': 0.00017677816504146904, 'timestamp': '2025-09-10 02:32:14.652400', 'step': 6184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:14.681444', 'step': 6184, 'epoch': 3} {'type': 'loss', 'content': 0.0004426330851856619, 'timestamp': '2025-09-10 02:32:14.683263', 'step': 6185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:14.712685', 'step': 6185, 'epoch': 3} {'type': 'loss', 'content': 0.00012685095134656876, 'timestamp': '2025-09-10 02:32:14.714665', 'step': 6186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:14.744367', 'step': 6186, 'epoch': 3} {'type': 'loss', 'content': 0.00031306553864851594, 'timestamp': '2025-09-10 02:32:14.746523', 'step': 6187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:14.778144', 'step': 6187, 'epoch': 3} {'type': 'loss', 'content': 0.0031656906940042973, 'timestamp': '2025-09-10 02:32:14.801685', 'step': 6188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:14.831314', 'step': 6188, 'epoch': 3} {'type': 'loss', 'content': 0.00013748157653026283, 'timestamp': '2025-09-10 02:32:14.833424', 'step': 6189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:14.863116', 'step': 6189, 'epoch': 3} {'type': 'loss', 'content': 8.928526222007349e-05, 'timestamp': '2025-09-10 02:32:14.865332', 'step': 6190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:14.895082', 'step': 6190, 'epoch': 3} {'type': 'loss', 'content': 9.361335105495527e-05, 'timestamp': '2025-09-10 02:32:14.897188', 'step': 6191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:14.926467', 'step': 6191, 'epoch': 3} {'type': 'loss', 'content': 0.00012292077008169144, 'timestamp': '2025-09-10 02:32:14.949934', 'step': 6192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:14.979431', 'step': 6192, 'epoch': 3} {'type': 'loss', 'content': 0.01482431497424841, 'timestamp': '2025-09-10 02:32:14.981328', 'step': 6193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:15.010231', 'step': 6193, 'epoch': 3} {'type': 'loss', 'content': 0.006716754753142595, 'timestamp': '2025-09-10 02:32:15.012208', 'step': 6194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:15.041545', 'step': 6194, 'epoch': 3} {'type': 'loss', 'content': 0.03301231190562248, 'timestamp': '2025-09-10 02:32:15.043380', 'step': 6195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:15.072426', 'step': 6195, 'epoch': 3} {'type': 'loss', 'content': 0.008143181912600994, 'timestamp': '2025-09-10 02:32:15.095873', 'step': 6196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:15.125315', 'step': 6196, 'epoch': 3} {'type': 'loss', 'content': 0.00032031707814894617, 'timestamp': '2025-09-10 02:32:15.127316', 'step': 6197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:15.157348', 'step': 6197, 'epoch': 3} {'type': 'loss', 'content': 0.0015913124661892653, 'timestamp': '2025-09-10 02:32:15.159360', 'step': 6198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:15.190009', 'step': 6198, 'epoch': 3} {'type': 'loss', 'content': 9.639223571866751e-05, 'timestamp': '2025-09-10 02:32:15.192145', 'step': 6199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:15.224192', 'step': 6199, 'epoch': 3} {'type': 'loss', 'content': 7.586956053273752e-05, 'timestamp': '2025-09-10 02:32:15.247859', 'step': 6200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:15.278900', 'step': 6200, 'epoch': 3} {'type': 'loss', 'content': 0.00031549923005513847, 'timestamp': '2025-09-10 02:32:15.280748', 'step': 6201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:15.310097', 'step': 6201, 'epoch': 3} {'type': 'loss', 'content': 0.0003611970169004053, 'timestamp': '2025-09-10 02:32:15.311838', 'step': 6202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:15.343670', 'step': 6202, 'epoch': 3} {'type': 'loss', 'content': 0.012886380776762962, 'timestamp': '2025-09-10 02:32:15.345565', 'step': 6203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:15.376797', 'step': 6203, 'epoch': 3} {'type': 'loss', 'content': 7.04435515217483e-05, 'timestamp': '2025-09-10 02:32:15.400396', 'step': 6204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:15.436695', 'step': 6204, 'epoch': 3} {'type': 'loss', 'content': 0.00022571615409106016, 'timestamp': '2025-09-10 02:32:15.438755', 'step': 6205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:15.472458', 'step': 6205, 'epoch': 3} {'type': 'loss', 'content': 7.774594268994406e-05, 'timestamp': '2025-09-10 02:32:15.474265', 'step': 6206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:15.509034', 'step': 6206, 'epoch': 3} {'type': 'loss', 'content': 5.046996375313029e-05, 'timestamp': '2025-09-10 02:32:15.510900', 'step': 6207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:15.540835', 'step': 6207, 'epoch': 3} {'type': 'loss', 'content': 0.00037210204754956067, 'timestamp': '2025-09-10 02:32:15.564471', 'step': 6208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:15.600615', 'step': 6208, 'epoch': 3} {'type': 'loss', 'content': 0.0005679802852682769, 'timestamp': '2025-09-10 02:32:15.602653', 'step': 6209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:15.636276', 'step': 6209, 'epoch': 3} {'type': 'loss', 'content': 0.0002304376830579713, 'timestamp': '2025-09-10 02:32:15.638117', 'step': 6210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:15.671371', 'step': 6210, 'epoch': 3} {'type': 'loss', 'content': 0.004458740819245577, 'timestamp': '2025-09-10 02:32:15.673673', 'step': 6211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:15.711415', 'step': 6211, 'epoch': 3} {'type': 'loss', 'content': 8.715118747204542e-05, 'timestamp': '2025-09-10 02:32:15.734981', 'step': 6212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:15.771670', 'step': 6212, 'epoch': 3} {'type': 'loss', 'content': 0.00011552788782864809, 'timestamp': '2025-09-10 02:32:15.773987', 'step': 6213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:15.812433', 'step': 6213, 'epoch': 3} {'type': 'loss', 'content': 0.0019429840613156557, 'timestamp': '2025-09-10 02:32:15.814448', 'step': 6214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:15.844191', 'step': 6214, 'epoch': 3} {'type': 'loss', 'content': 0.002169437939301133, 'timestamp': '2025-09-10 02:32:15.846212', 'step': 6215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:15.875748', 'step': 6215, 'epoch': 3} {'type': 'loss', 'content': 0.017387619242072105, 'timestamp': '2025-09-10 02:32:15.900170', 'step': 6216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:15.929568', 'step': 6216, 'epoch': 3} {'type': 'loss', 'content': 0.0425243116915226, 'timestamp': '2025-09-10 02:32:15.931524', 'step': 6217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:15.960536', 'step': 6217, 'epoch': 3} {'type': 'loss', 'content': 0.04513928294181824, 'timestamp': '2025-09-10 02:32:15.962697', 'step': 6218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:15.992056', 'step': 6218, 'epoch': 3} {'type': 'loss', 'content': 0.0001454920566175133, 'timestamp': '2025-09-10 02:32:15.994231', 'step': 6219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:16.023619', 'step': 6219, 'epoch': 3} {'type': 'loss', 'content': 0.00026676151901483536, 'timestamp': '2025-09-10 02:32:16.047266', 'step': 6220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:16.077588', 'step': 6220, 'epoch': 3} {'type': 'loss', 'content': 0.01327372808009386, 'timestamp': '2025-09-10 02:32:16.079322', 'step': 6221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:16.108561', 'step': 6221, 'epoch': 3} {'type': 'loss', 'content': 7.856685988372192e-05, 'timestamp': '2025-09-10 02:32:16.110478', 'step': 6222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:16.140122', 'step': 6222, 'epoch': 3} {'type': 'loss', 'content': 0.0002478799724485725, 'timestamp': '2025-09-10 02:32:16.141926', 'step': 6223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:16.170628', 'step': 6223, 'epoch': 3} {'type': 'loss', 'content': 0.0002998611016664654, 'timestamp': '2025-09-10 02:32:16.193940', 'step': 6224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:16.223877', 'step': 6224, 'epoch': 3} {'type': 'loss', 'content': 0.00018482895393390208, 'timestamp': '2025-09-10 02:32:16.225816', 'step': 6225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:16.256063', 'step': 6225, 'epoch': 3} {'type': 'loss', 'content': 9.805053559830412e-05, 'timestamp': '2025-09-10 02:32:16.257926', 'step': 6226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:16.288360', 'step': 6226, 'epoch': 3} {'type': 'loss', 'content': 0.00033532059751451015, 'timestamp': '2025-09-10 02:32:16.290101', 'step': 6227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:16.322326', 'step': 6227, 'epoch': 3} {'type': 'loss', 'content': 0.00011088912287959829, 'timestamp': '2025-09-10 02:32:16.345707', 'step': 6228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:16.378762', 'step': 6228, 'epoch': 3} {'type': 'loss', 'content': 0.00028449486126191914, 'timestamp': '2025-09-10 02:32:16.380436', 'step': 6229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:16.409194', 'step': 6229, 'epoch': 3} {'type': 'loss', 'content': 0.001823748811148107, 'timestamp': '2025-09-10 02:32:16.410931', 'step': 6230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:16.439871', 'step': 6230, 'epoch': 3} {'type': 'loss', 'content': 0.00012009156489511952, 'timestamp': '2025-09-10 02:32:16.441630', 'step': 6231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:16.472398', 'step': 6231, 'epoch': 3} {'type': 'loss', 'content': 0.00025651781470514834, 'timestamp': '2025-09-10 02:32:16.495813', 'step': 6232, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1582003754624}], 'timestamp': '2025-09-10 02:32:18.431282', 'step': 6232, 'epoch': 3} {'type': 'pplx', 'content': 2606747.3698908505, 'timestamp': '2025-09-10 02:32:18.434868', 'step': 6232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:18.463633', 'step': 6232, 'epoch': 3} {'type': 'loss', 'content': 0.02282080054283142, 'timestamp': '2025-09-10 02:32:18.465407', 'step': 6233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:18.494958', 'step': 6233, 'epoch': 3} {'type': 'loss', 'content': 0.00016311134095303714, 'timestamp': '2025-09-10 02:32:18.496931', 'step': 6234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:18.526297', 'step': 6234, 'epoch': 3} {'type': 'loss', 'content': 0.000255206337897107, 'timestamp': '2025-09-10 02:32:18.528192', 'step': 6235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:18.557708', 'step': 6235, 'epoch': 3} {'type': 'loss', 'content': 8.574249659432098e-05, 'timestamp': '2025-09-10 02:32:18.581180', 'step': 6236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:18.610717', 'step': 6236, 'epoch': 3} {'type': 'loss', 'content': 0.00012947633513249457, 'timestamp': '2025-09-10 02:32:18.612895', 'step': 6237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:18.642857', 'step': 6237, 'epoch': 3} {'type': 'loss', 'content': 0.04599553719162941, 'timestamp': '2025-09-10 02:32:18.644925', 'step': 6238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:18.674260', 'step': 6238, 'epoch': 3} {'type': 'loss', 'content': 6.865202158223838e-05, 'timestamp': '2025-09-10 02:32:18.676093', 'step': 6239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:18.705420', 'step': 6239, 'epoch': 3} {'type': 'loss', 'content': 9.582000348018482e-05, 'timestamp': '2025-09-10 02:32:18.728882', 'step': 6240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:18.758203', 'step': 6240, 'epoch': 3} {'type': 'loss', 'content': 0.0253781545907259, 'timestamp': '2025-09-10 02:32:18.760079', 'step': 6241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:18.792731', 'step': 6241, 'epoch': 3} {'type': 'loss', 'content': 0.0001324364129686728, 'timestamp': '2025-09-10 02:32:18.794693', 'step': 6242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:18.823912', 'step': 6242, 'epoch': 3} {'type': 'loss', 'content': 0.00013522346853278577, 'timestamp': '2025-09-10 02:32:18.825828', 'step': 6243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:18.855729', 'step': 6243, 'epoch': 3} {'type': 'loss', 'content': 0.00027700577629730105, 'timestamp': '2025-09-10 02:32:18.879022', 'step': 6244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:18.908380', 'step': 6244, 'epoch': 3} {'type': 'loss', 'content': 0.0021711511071771383, 'timestamp': '2025-09-10 02:32:18.910263', 'step': 6245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:18.939005', 'step': 6245, 'epoch': 3} {'type': 'loss', 'content': 0.0015461508883163333, 'timestamp': '2025-09-10 02:32:18.940797', 'step': 6246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:18.970112', 'step': 6246, 'epoch': 3} {'type': 'loss', 'content': 0.00022470230760518461, 'timestamp': '2025-09-10 02:32:18.972104', 'step': 6247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:19.001210', 'step': 6247, 'epoch': 3} {'type': 'loss', 'content': 0.000954842078499496, 'timestamp': '2025-09-10 02:32:19.024809', 'step': 6248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:19.054968', 'step': 6248, 'epoch': 3} {'type': 'loss', 'content': 0.00021177942107897252, 'timestamp': '2025-09-10 02:32:19.056788', 'step': 6249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:19.085634', 'step': 6249, 'epoch': 3} {'type': 'loss', 'content': 0.008807296864688396, 'timestamp': '2025-09-10 02:32:19.087451', 'step': 6250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:19.116854', 'step': 6250, 'epoch': 3} {'type': 'loss', 'content': 0.00024041572760324925, 'timestamp': '2025-09-10 02:32:19.118606', 'step': 6251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:19.147863', 'step': 6251, 'epoch': 3} {'type': 'loss', 'content': 0.03950396180152893, 'timestamp': '2025-09-10 02:32:19.171237', 'step': 6252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:19.201996', 'step': 6252, 'epoch': 3} {'type': 'loss', 'content': 0.0005115449312143028, 'timestamp': '2025-09-10 02:32:19.203785', 'step': 6253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:19.236659', 'step': 6253, 'epoch': 3} {'type': 'loss', 'content': 0.005030186381191015, 'timestamp': '2025-09-10 02:32:19.238428', 'step': 6254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:19.270585', 'step': 6254, 'epoch': 3} {'type': 'loss', 'content': 0.017237406224012375, 'timestamp': '2025-09-10 02:32:19.272698', 'step': 6255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:19.302247', 'step': 6255, 'epoch': 3} {'type': 'loss', 'content': 0.0001371714606648311, 'timestamp': '2025-09-10 02:32:19.326265', 'step': 6256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:19.357618', 'step': 6256, 'epoch': 3} {'type': 'loss', 'content': 0.0002689917164389044, 'timestamp': '2025-09-10 02:32:19.359973', 'step': 6257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:19.390962', 'step': 6257, 'epoch': 3} {'type': 'loss', 'content': 0.00031374432728625834, 'timestamp': '2025-09-10 02:32:19.393036', 'step': 6258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:19.428450', 'step': 6258, 'epoch': 3} {'type': 'loss', 'content': 0.0002451702021062374, 'timestamp': '2025-09-10 02:32:19.430259', 'step': 6259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:19.461143', 'step': 6259, 'epoch': 3} {'type': 'loss', 'content': 0.00019412672554608434, 'timestamp': '2025-09-10 02:32:19.484750', 'step': 6260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:19.517857', 'step': 6260, 'epoch': 3} {'type': 'loss', 'content': 0.00016561677330173552, 'timestamp': '2025-09-10 02:32:19.519927', 'step': 6261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:19.553055', 'step': 6261, 'epoch': 3} {'type': 'loss', 'content': 8.579413406550884e-05, 'timestamp': '2025-09-10 02:32:19.555146', 'step': 6262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:19.591614', 'step': 6262, 'epoch': 3} {'type': 'loss', 'content': 0.00016297269030474126, 'timestamp': '2025-09-10 02:32:19.593356', 'step': 6263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:19.625978', 'step': 6263, 'epoch': 3} {'type': 'loss', 'content': 0.0018586188089102507, 'timestamp': '2025-09-10 02:32:19.649378', 'step': 6264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:19.687636', 'step': 6264, 'epoch': 3} {'type': 'loss', 'content': 0.00047484468086622655, 'timestamp': '2025-09-10 02:32:19.689473', 'step': 6265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:19.727203', 'step': 6265, 'epoch': 3} {'type': 'loss', 'content': 0.00035110226599499583, 'timestamp': '2025-09-10 02:32:19.729145', 'step': 6266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:19.764740', 'step': 6266, 'epoch': 3} {'type': 'loss', 'content': 0.00018414246733300388, 'timestamp': '2025-09-10 02:32:19.766376', 'step': 6267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:19.805016', 'step': 6267, 'epoch': 3} {'type': 'loss', 'content': 0.0004543558170553297, 'timestamp': '2025-09-10 02:32:19.828573', 'step': 6268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:19.858293', 'step': 6268, 'epoch': 3} {'type': 'loss', 'content': 0.00028682383708655834, 'timestamp': '2025-09-10 02:32:19.860282', 'step': 6269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:19.889809', 'step': 6269, 'epoch': 3} {'type': 'loss', 'content': 0.0007902501965872943, 'timestamp': '2025-09-10 02:32:19.892018', 'step': 6270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:19.922637', 'step': 6270, 'epoch': 3} {'type': 'loss', 'content': 0.001329060411080718, 'timestamp': '2025-09-10 02:32:19.924473', 'step': 6271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:19.954460', 'step': 6271, 'epoch': 3} {'type': 'loss', 'content': 9.7282012575306e-05, 'timestamp': '2025-09-10 02:32:19.977960', 'step': 6272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:20.006771', 'step': 6272, 'epoch': 3} {'type': 'loss', 'content': 0.00014796505274716765, 'timestamp': '2025-09-10 02:32:20.008334', 'step': 6273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:20.037605', 'step': 6273, 'epoch': 3} {'type': 'loss', 'content': 0.0030559967271983624, 'timestamp': '2025-09-10 02:32:20.039555', 'step': 6274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:20.068523', 'step': 6274, 'epoch': 3} {'type': 'loss', 'content': 0.002250237390398979, 'timestamp': '2025-09-10 02:32:20.070454', 'step': 6275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:20.099497', 'step': 6275, 'epoch': 3} {'type': 'loss', 'content': 0.000167002814123407, 'timestamp': '2025-09-10 02:32:20.122883', 'step': 6276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:20.151636', 'step': 6276, 'epoch': 3} {'type': 'loss', 'content': 9.803228022065014e-05, 'timestamp': '2025-09-10 02:32:20.153468', 'step': 6277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:20.182403', 'step': 6277, 'epoch': 3} {'type': 'loss', 'content': 0.0011862096143886447, 'timestamp': '2025-09-10 02:32:20.184469', 'step': 6278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:20.213282', 'step': 6278, 'epoch': 3} {'type': 'loss', 'content': 0.00016611372120678425, 'timestamp': '2025-09-10 02:32:20.215310', 'step': 6279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:20.244471', 'step': 6279, 'epoch': 3} {'type': 'loss', 'content': 0.0001127138311858289, 'timestamp': '2025-09-10 02:32:20.267716', 'step': 6280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:20.297063', 'step': 6280, 'epoch': 3} {'type': 'loss', 'content': 0.004821238573640585, 'timestamp': '2025-09-10 02:32:20.298897', 'step': 6281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:20.328250', 'step': 6281, 'epoch': 3} {'type': 'loss', 'content': 0.002264035167172551, 'timestamp': '2025-09-10 02:32:20.330132', 'step': 6282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:20.359405', 'step': 6282, 'epoch': 3} {'type': 'loss', 'content': 0.00019882863853126764, 'timestamp': '2025-09-10 02:32:20.361453', 'step': 6283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:20.391238', 'step': 6283, 'epoch': 3} {'type': 'loss', 'content': 0.0003595865855459124, 'timestamp': '2025-09-10 02:32:20.414781', 'step': 6284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:20.444124', 'step': 6284, 'epoch': 3} {'type': 'loss', 'content': 0.00025777207338251173, 'timestamp': '2025-09-10 02:32:20.446232', 'step': 6285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:20.475692', 'step': 6285, 'epoch': 3} {'type': 'loss', 'content': 0.011265160515904427, 'timestamp': '2025-09-10 02:32:20.477574', 'step': 6286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:20.506551', 'step': 6286, 'epoch': 3} {'type': 'loss', 'content': 0.0018868193728849292, 'timestamp': '2025-09-10 02:32:20.508366', 'step': 6287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:20.537179', 'step': 6287, 'epoch': 3} {'type': 'loss', 'content': 0.018199991434812546, 'timestamp': '2025-09-10 02:32:20.560369', 'step': 6288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:20.590258', 'step': 6288, 'epoch': 3} {'type': 'loss', 'content': 0.0010011766571551561, 'timestamp': '2025-09-10 02:32:20.592057', 'step': 6289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:20.621450', 'step': 6289, 'epoch': 3} {'type': 'loss', 'content': 0.0005134205566719174, 'timestamp': '2025-09-10 02:32:20.623595', 'step': 6290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:20.652552', 'step': 6290, 'epoch': 3} {'type': 'loss', 'content': 0.0004429513937793672, 'timestamp': '2025-09-10 02:32:20.654507', 'step': 6291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:20.683722', 'step': 6291, 'epoch': 3} {'type': 'loss', 'content': 0.00045281826169230044, 'timestamp': '2025-09-10 02:32:20.707280', 'step': 6292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:20.736434', 'step': 6292, 'epoch': 3} {'type': 'loss', 'content': 0.00084385258378461, 'timestamp': '2025-09-10 02:32:20.738389', 'step': 6293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:20.771882', 'step': 6293, 'epoch': 3} {'type': 'loss', 'content': 0.0003129442047793418, 'timestamp': '2025-09-10 02:32:20.773844', 'step': 6294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:20.808669', 'step': 6294, 'epoch': 3} {'type': 'loss', 'content': 0.0003997218154836446, 'timestamp': '2025-09-10 02:32:20.810523', 'step': 6295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:20.839583', 'step': 6295, 'epoch': 3} {'type': 'loss', 'content': 9.220842184731737e-05, 'timestamp': '2025-09-10 02:32:20.863075', 'step': 6296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:20.892053', 'step': 6296, 'epoch': 3} {'type': 'loss', 'content': 0.013149378821253777, 'timestamp': '2025-09-10 02:32:20.893771', 'step': 6297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:20.922321', 'step': 6297, 'epoch': 3} {'type': 'loss', 'content': 0.017743749544024467, 'timestamp': '2025-09-10 02:32:20.924058', 'step': 6298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:20.952506', 'step': 6298, 'epoch': 3} {'type': 'loss', 'content': 0.002593550132587552, 'timestamp': '2025-09-10 02:32:20.954584', 'step': 6299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:20.983919', 'step': 6299, 'epoch': 3} {'type': 'loss', 'content': 0.0023273208644241095, 'timestamp': '2025-09-10 02:32:21.007316', 'step': 6300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:21.036427', 'step': 6300, 'epoch': 3} {'type': 'loss', 'content': 0.00046904286136850715, 'timestamp': '2025-09-10 02:32:21.038125', 'step': 6301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:21.067227', 'step': 6301, 'epoch': 3} {'type': 'loss', 'content': 0.004067537374794483, 'timestamp': '2025-09-10 02:32:21.069008', 'step': 6302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:21.097943', 'step': 6302, 'epoch': 3} {'type': 'loss', 'content': 0.00010718592966441065, 'timestamp': '2025-09-10 02:32:21.101084', 'step': 6303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:21.132627', 'step': 6303, 'epoch': 3} {'type': 'loss', 'content': 0.00035416753962635994, 'timestamp': '2025-09-10 02:32:21.156323', 'step': 6304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:21.187542', 'step': 6304, 'epoch': 3} {'type': 'loss', 'content': 0.0010335204424336553, 'timestamp': '2025-09-10 02:32:21.189449', 'step': 6305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:21.220159', 'step': 6305, 'epoch': 3} {'type': 'loss', 'content': 0.00010592629405437037, 'timestamp': '2025-09-10 02:32:21.221922', 'step': 6306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:21.255421', 'step': 6306, 'epoch': 3} {'type': 'loss', 'content': 0.0005984573508612812, 'timestamp': '2025-09-10 02:32:21.257272', 'step': 6307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:21.286192', 'step': 6307, 'epoch': 3} {'type': 'loss', 'content': 0.0011838421924039721, 'timestamp': '2025-09-10 02:32:21.309628', 'step': 6308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:21.340932', 'step': 6308, 'epoch': 3} {'type': 'loss', 'content': 0.00021382412523962557, 'timestamp': '2025-09-10 02:32:21.342774', 'step': 6309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:21.372961', 'step': 6309, 'epoch': 3} {'type': 'loss', 'content': 4.185305442661047e-05, 'timestamp': '2025-09-10 02:32:21.374703', 'step': 6310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:21.411039', 'step': 6310, 'epoch': 3} {'type': 'loss', 'content': 0.0213544312864542, 'timestamp': '2025-09-10 02:32:21.412835', 'step': 6311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:21.444424', 'step': 6311, 'epoch': 3} {'type': 'loss', 'content': 0.002948646666482091, 'timestamp': '2025-09-10 02:32:21.467921', 'step': 6312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:21.504900', 'step': 6312, 'epoch': 3} {'type': 'loss', 'content': 0.0010231381747871637, 'timestamp': '2025-09-10 02:32:21.506604', 'step': 6313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:21.538686', 'step': 6313, 'epoch': 3} {'type': 'loss', 'content': 0.029510745778679848, 'timestamp': '2025-09-10 02:32:21.540548', 'step': 6314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:21.575090', 'step': 6314, 'epoch': 3} {'type': 'loss', 'content': 0.00034681681427173316, 'timestamp': '2025-09-10 02:32:21.576980', 'step': 6315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:21.611104', 'step': 6315, 'epoch': 3} {'type': 'loss', 'content': 0.0002904444409068674, 'timestamp': '2025-09-10 02:32:21.634571', 'step': 6316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:21.665868', 'step': 6316, 'epoch': 3} {'type': 'loss', 'content': 0.0010635191574692726, 'timestamp': '2025-09-10 02:32:21.667832', 'step': 6317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:21.707074', 'step': 6317, 'epoch': 3} {'type': 'loss', 'content': 0.0005607870989479125, 'timestamp': '2025-09-10 02:32:21.708769', 'step': 6318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:21.748027', 'step': 6318, 'epoch': 3} {'type': 'loss', 'content': 8.25113311293535e-05, 'timestamp': '2025-09-10 02:32:21.749965', 'step': 6319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:21.785144', 'step': 6319, 'epoch': 3} {'type': 'loss', 'content': 0.00039944160380400717, 'timestamp': '2025-09-10 02:32:21.808445', 'step': 6320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:21.837056', 'step': 6320, 'epoch': 3} {'type': 'loss', 'content': 0.05035491660237312, 'timestamp': '2025-09-10 02:32:21.838991', 'step': 6321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:21.867958', 'step': 6321, 'epoch': 3} {'type': 'loss', 'content': 0.003698610933497548, 'timestamp': '2025-09-10 02:32:21.869704', 'step': 6322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:21.898316', 'step': 6322, 'epoch': 3} {'type': 'loss', 'content': 0.00028630904853343964, 'timestamp': '2025-09-10 02:32:21.900190', 'step': 6323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:21.929464', 'step': 6323, 'epoch': 3} {'type': 'loss', 'content': 0.00029259143047966063, 'timestamp': '2025-09-10 02:32:21.952906', 'step': 6324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:21.981991', 'step': 6324, 'epoch': 3} {'type': 'loss', 'content': 0.0008371643489226699, 'timestamp': '2025-09-10 02:32:21.983841', 'step': 6325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:22.012484', 'step': 6325, 'epoch': 3} {'type': 'loss', 'content': 0.0013441009214147925, 'timestamp': '2025-09-10 02:32:22.014228', 'step': 6326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:22.042981', 'step': 6326, 'epoch': 3} {'type': 'loss', 'content': 0.00048186283675022423, 'timestamp': '2025-09-10 02:32:22.044753', 'step': 6327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:22.073713', 'step': 6327, 'epoch': 3} {'type': 'loss', 'content': 0.001302977092564106, 'timestamp': '2025-09-10 02:32:22.097189', 'step': 6328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:22.126709', 'step': 6328, 'epoch': 3} {'type': 'loss', 'content': 0.0016445436049252748, 'timestamp': '2025-09-10 02:32:22.128656', 'step': 6329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:22.157366', 'step': 6329, 'epoch': 3} {'type': 'loss', 'content': 0.09863261878490448, 'timestamp': '2025-09-10 02:32:22.159195', 'step': 6330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:22.188379', 'step': 6330, 'epoch': 3} {'type': 'loss', 'content': 0.000818633649032563, 'timestamp': '2025-09-10 02:32:22.190299', 'step': 6331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:22.219476', 'step': 6331, 'epoch': 3} {'type': 'loss', 'content': 7.42963093216531e-05, 'timestamp': '2025-09-10 02:32:22.242810', 'step': 6332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:22.271481', 'step': 6332, 'epoch': 3} {'type': 'loss', 'content': 0.03502601757645607, 'timestamp': '2025-09-10 02:32:22.273453', 'step': 6333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:22.302613', 'step': 6333, 'epoch': 3} {'type': 'loss', 'content': 0.0007293486269190907, 'timestamp': '2025-09-10 02:32:22.304248', 'step': 6334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:22.332801', 'step': 6334, 'epoch': 3} {'type': 'loss', 'content': 0.001932331477291882, 'timestamp': '2025-09-10 02:32:22.334557', 'step': 6335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:22.363441', 'step': 6335, 'epoch': 3} {'type': 'loss', 'content': 0.00112222321331501, 'timestamp': '2025-09-10 02:32:22.386748', 'step': 6336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:22.415178', 'step': 6336, 'epoch': 3} {'type': 'loss', 'content': 0.014759731478989124, 'timestamp': '2025-09-10 02:32:22.417050', 'step': 6337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:22.445997', 'step': 6337, 'epoch': 3} {'type': 'loss', 'content': 0.0559990331530571, 'timestamp': '2025-09-10 02:32:22.447934', 'step': 6338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:22.476673', 'step': 6338, 'epoch': 3} {'type': 'loss', 'content': 0.0019543636590242386, 'timestamp': '2025-09-10 02:32:22.478321', 'step': 6339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:22.507602', 'step': 6339, 'epoch': 3} {'type': 'loss', 'content': 0.0009176231105811894, 'timestamp': '2025-09-10 02:32:22.530706', 'step': 6340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:22.559608', 'step': 6340, 'epoch': 3} {'type': 'loss', 'content': 0.00015823465946596116, 'timestamp': '2025-09-10 02:32:22.561790', 'step': 6341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:22.590447', 'step': 6341, 'epoch': 3} {'type': 'loss', 'content': 0.0005021968390792608, 'timestamp': '2025-09-10 02:32:22.592155', 'step': 6342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:22.621321', 'step': 6342, 'epoch': 3} {'type': 'loss', 'content': 0.0017196570988744497, 'timestamp': '2025-09-10 02:32:22.623278', 'step': 6343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:22.652815', 'step': 6343, 'epoch': 3} {'type': 'loss', 'content': 0.04168880358338356, 'timestamp': '2025-09-10 02:32:22.676044', 'step': 6344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:22.704993', 'step': 6344, 'epoch': 3} {'type': 'loss', 'content': 0.0009726889547891915, 'timestamp': '2025-09-10 02:32:22.706845', 'step': 6345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:22.736276', 'step': 6345, 'epoch': 3} {'type': 'loss', 'content': 0.0030025788582861423, 'timestamp': '2025-09-10 02:32:22.738119', 'step': 6346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:22.768199', 'step': 6346, 'epoch': 3} {'type': 'loss', 'content': 0.001672284910455346, 'timestamp': '2025-09-10 02:32:22.769909', 'step': 6347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:22.803449', 'step': 6347, 'epoch': 3} {'type': 'loss', 'content': 0.0004967619897797704, 'timestamp': '2025-09-10 02:32:22.826889', 'step': 6348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:22.855883', 'step': 6348, 'epoch': 3} {'type': 'loss', 'content': 0.0004202341369818896, 'timestamp': '2025-09-10 02:32:22.857547', 'step': 6349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:22.886531', 'step': 6349, 'epoch': 3} {'type': 'loss', 'content': 0.006135419011116028, 'timestamp': '2025-09-10 02:32:22.888270', 'step': 6350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:22.917201', 'step': 6350, 'epoch': 3} {'type': 'loss', 'content': 0.0023682310711592436, 'timestamp': '2025-09-10 02:32:22.918790', 'step': 6351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:22.947449', 'step': 6351, 'epoch': 3} {'type': 'loss', 'content': 0.0017628510249778628, 'timestamp': '2025-09-10 02:32:22.970870', 'step': 6352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:22.999829', 'step': 6352, 'epoch': 3} {'type': 'loss', 'content': 0.02108711563050747, 'timestamp': '2025-09-10 02:32:23.001935', 'step': 6353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:23.030897', 'step': 6353, 'epoch': 3} {'type': 'loss', 'content': 0.004597174469381571, 'timestamp': '2025-09-10 02:32:23.032690', 'step': 6354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:23.061529', 'step': 6354, 'epoch': 3} {'type': 'loss', 'content': 0.001220061327330768, 'timestamp': '2025-09-10 02:32:23.063472', 'step': 6355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:23.092605', 'step': 6355, 'epoch': 3} {'type': 'loss', 'content': 0.0007231011986732483, 'timestamp': '2025-09-10 02:32:23.115893', 'step': 6356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:23.144968', 'step': 6356, 'epoch': 3} {'type': 'loss', 'content': 0.004517923109233379, 'timestamp': '2025-09-10 02:32:23.146881', 'step': 6357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:23.177243', 'step': 6357, 'epoch': 3} {'type': 'loss', 'content': 0.0022846742067486048, 'timestamp': '2025-09-10 02:32:23.179200', 'step': 6358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:23.208747', 'step': 6358, 'epoch': 3} {'type': 'loss', 'content': 0.0007761307060718536, 'timestamp': '2025-09-10 02:32:23.210728', 'step': 6359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:23.243331', 'step': 6359, 'epoch': 3} {'type': 'loss', 'content': 0.005061306990683079, 'timestamp': '2025-09-10 02:32:23.266596', 'step': 6360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:23.296223', 'step': 6360, 'epoch': 3} {'type': 'loss', 'content': 0.00022909880499355495, 'timestamp': '2025-09-10 02:32:23.297906', 'step': 6361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:23.326536', 'step': 6361, 'epoch': 3} {'type': 'loss', 'content': 0.010702590458095074, 'timestamp': '2025-09-10 02:32:23.328252', 'step': 6362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:23.359631', 'step': 6362, 'epoch': 3} {'type': 'loss', 'content': 0.002730281325057149, 'timestamp': '2025-09-10 02:32:23.361254', 'step': 6363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:23.392880', 'step': 6363, 'epoch': 3} {'type': 'loss', 'content': 0.01938006654381752, 'timestamp': '2025-09-10 02:32:23.416238', 'step': 6364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:23.446984', 'step': 6364, 'epoch': 3} {'type': 'loss', 'content': 0.0030088615603744984, 'timestamp': '2025-09-10 02:32:23.448832', 'step': 6365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:23.483805', 'step': 6365, 'epoch': 3} {'type': 'loss', 'content': 0.002860612003132701, 'timestamp': '2025-09-10 02:32:23.485764', 'step': 6366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:23.519135', 'step': 6366, 'epoch': 3} {'type': 'loss', 'content': 0.0010435706935822964, 'timestamp': '2025-09-10 02:32:23.520982', 'step': 6367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:23.553370', 'step': 6367, 'epoch': 3} {'type': 'loss', 'content': 0.008101065643131733, 'timestamp': '2025-09-10 02:32:23.576939', 'step': 6368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:23.609474', 'step': 6368, 'epoch': 3} {'type': 'loss', 'content': 0.0003759710234589875, 'timestamp': '2025-09-10 02:32:23.611411', 'step': 6369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:23.646064', 'step': 6369, 'epoch': 3} {'type': 'loss', 'content': 0.0014672511024400592, 'timestamp': '2025-09-10 02:32:23.647880', 'step': 6370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:23.685207', 'step': 6370, 'epoch': 3} {'type': 'loss', 'content': 0.005713839549571276, 'timestamp': '2025-09-10 02:32:23.687210', 'step': 6371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:23.726817', 'step': 6371, 'epoch': 3} {'type': 'loss', 'content': 0.0010342712048441172, 'timestamp': '2025-09-10 02:32:23.750593', 'step': 6372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:23.785372', 'step': 6372, 'epoch': 3} {'type': 'loss', 'content': 0.0010816141730174422, 'timestamp': '2025-09-10 02:32:23.787580', 'step': 6373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:23.821922', 'step': 6373, 'epoch': 3} {'type': 'loss', 'content': 0.0015200666384771466, 'timestamp': '2025-09-10 02:32:23.823629', 'step': 6374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:23.852503', 'step': 6374, 'epoch': 3} {'type': 'loss', 'content': 0.003132024547085166, 'timestamp': '2025-09-10 02:32:23.854410', 'step': 6375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:23.883606', 'step': 6375, 'epoch': 3} {'type': 'loss', 'content': 0.0013345686020329595, 'timestamp': '2025-09-10 02:32:23.906930', 'step': 6376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:23.936352', 'step': 6376, 'epoch': 3} {'type': 'loss', 'content': 0.003480277955532074, 'timestamp': '2025-09-10 02:32:23.938298', 'step': 6377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:23.967827', 'step': 6377, 'epoch': 3} {'type': 'loss', 'content': 0.0006901667220517993, 'timestamp': '2025-09-10 02:32:23.969765', 'step': 6378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:23.998370', 'step': 6378, 'epoch': 3} {'type': 'loss', 'content': 0.0003840525168925524, 'timestamp': '2025-09-10 02:32:24.000437', 'step': 6379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:24.029704', 'step': 6379, 'epoch': 3} {'type': 'loss', 'content': 0.003375781001523137, 'timestamp': '2025-09-10 02:32:24.052864', 'step': 6380, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:24.081911', 'step': 6380, 'epoch': 3} {'type': 'loss', 'content': 0.001060336478985846, 'timestamp': '2025-09-10 02:32:24.083641', 'step': 6381, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:24.112372', 'step': 6381, 'epoch': 3} {'type': 'loss', 'content': 0.00025973698939196765, 'timestamp': '2025-09-10 02:32:24.114068', 'step': 6382, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:24.142751', 'step': 6382, 'epoch': 3} {'type': 'loss', 'content': 0.0006213196320459247, 'timestamp': '2025-09-10 02:32:24.144467', 'step': 6383, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:24.174226', 'step': 6383, 'epoch': 3} {'type': 'loss', 'content': 0.0005688367527909577, 'timestamp': '2025-09-10 02:32:24.197521', 'step': 6384, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1582003754624}], 'timestamp': '2025-09-10 02:32:26.080357', 'step': 6384, 'epoch': 3} {'type': 'pplx', 'content': 2602046.62760713, 'timestamp': '2025-09-10 02:32:26.082134', 'step': 6384, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:26.110040', 'step': 6384, 'epoch': 3} {'type': 'loss', 'content': 0.0007975572370924056, 'timestamp': '2025-09-10 02:32:26.111781', 'step': 6385, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:26.141032', 'step': 6385, 'epoch': 3} {'type': 'loss', 'content': 0.032970402389764786, 'timestamp': '2025-09-10 02:32:26.142887', 'step': 6386, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:26.171430', 'step': 6386, 'epoch': 3} {'type': 'loss', 'content': 0.00016916354070417583, 'timestamp': '2025-09-10 02:32:26.173215', 'step': 6387, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:26.202098', 'step': 6387, 'epoch': 3} {'type': 'loss', 'content': 0.0001721300941426307, 'timestamp': '2025-09-10 02:32:26.225762', 'step': 6388, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:26.254988', 'step': 6388, 'epoch': 3} {'type': 'loss', 'content': 0.00022461578191723675, 'timestamp': '2025-09-10 02:32:26.256926', 'step': 6389, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:26.286080', 'step': 6389, 'epoch': 3} {'type': 'loss', 'content': 0.0004514531174208969, 'timestamp': '2025-09-10 02:32:26.291189', 'step': 6390, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:26.322315', 'step': 6390, 'epoch': 3} {'type': 'loss', 'content': 0.0002696145966183394, 'timestamp': '2025-09-10 02:32:26.324143', 'step': 6391, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:26.353218', 'step': 6391, 'epoch': 3} {'type': 'loss', 'content': 0.002055809134617448, 'timestamp': '2025-09-10 02:32:26.376654', 'step': 6392, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:26.407933', 'step': 6392, 'epoch': 3} {'type': 'loss', 'content': 0.012479028664529324, 'timestamp': '2025-09-10 02:32:26.409645', 'step': 6393, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:26.438377', 'step': 6393, 'epoch': 3} {'type': 'loss', 'content': 0.0008340683998540044, 'timestamp': '2025-09-10 02:32:26.440065', 'step': 6394, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:26.469193', 'step': 6394, 'epoch': 3} {'type': 'loss', 'content': 0.0007035091402940452, 'timestamp': '2025-09-10 02:32:26.479478', 'step': 6395, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:26.510827', 'step': 6395, 'epoch': 3} {'type': 'loss', 'content': 0.004726804792881012, 'timestamp': '2025-09-10 02:32:26.534378', 'step': 6396, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:26.564634', 'step': 6396, 'epoch': 3} {'type': 'loss', 'content': 0.00065030058613047, 'timestamp': '2025-09-10 02:32:26.567185', 'step': 6397, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:26.599118', 'step': 6397, 'epoch': 3} {'type': 'loss', 'content': 0.0008404856198467314, 'timestamp': '2025-09-10 02:32:26.600716', 'step': 6398, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:26.632272', 'step': 6398, 'epoch': 3} {'type': 'loss', 'content': 0.024703437462449074, 'timestamp': '2025-09-10 02:32:26.634083', 'step': 6399, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:26.662807', 'step': 6399, 'epoch': 3} {'type': 'loss', 'content': 0.027414944022893906, 'timestamp': '2025-09-10 02:32:26.686458', 'step': 6400, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:26.715363', 'step': 6400, 'epoch': 3} {'type': 'loss', 'content': 0.0026188211049884558, 'timestamp': '2025-09-10 02:32:26.717884', 'step': 6401, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:26.747095', 'step': 6401, 'epoch': 3} {'type': 'loss', 'content': 0.01582406833767891, 'timestamp': '2025-09-10 02:32:26.751325', 'step': 6402, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:26.787459', 'step': 6402, 'epoch': 3} {'type': 'loss', 'content': 0.0018050411017611623, 'timestamp': '2025-09-10 02:32:26.789057', 'step': 6403, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:26.827290', 'step': 6403, 'epoch': 3} {'type': 'loss', 'content': 0.00014450155140366405, 'timestamp': '2025-09-10 02:32:26.854654', 'step': 6404, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:26.886558', 'step': 6404, 'epoch': 3} {'type': 'loss', 'content': 0.0009893247624859214, 'timestamp': '2025-09-10 02:32:26.888367', 'step': 6405, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:26.917573', 'step': 6405, 'epoch': 3} {'type': 'loss', 'content': 0.05074020102620125, 'timestamp': '2025-09-10 02:32:26.919349', 'step': 6406, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:26.948263', 'step': 6406, 'epoch': 3} {'type': 'loss', 'content': 0.0004242721770424396, 'timestamp': '2025-09-10 02:32:26.950250', 'step': 6407, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:26.981601', 'step': 6407, 'epoch': 3} {'type': 'loss', 'content': 0.0032885505352169275, 'timestamp': '2025-09-10 02:32:27.010126', 'step': 6408, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:27.039001', 'step': 6408, 'epoch': 3} {'type': 'loss', 'content': 0.00021194576402194798, 'timestamp': '2025-09-10 02:32:27.040915', 'step': 6409, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:27.069641', 'step': 6409, 'epoch': 3} {'type': 'loss', 'content': 0.00020467361900955439, 'timestamp': '2025-09-10 02:32:27.073547', 'step': 6410, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:27.103346', 'step': 6410, 'epoch': 3} {'type': 'loss', 'content': 0.0005671937251463532, 'timestamp': '2025-09-10 02:32:27.105326', 'step': 6411, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:27.136431', 'step': 6411, 'epoch': 3} {'type': 'loss', 'content': 0.00018863029254134744, 'timestamp': '2025-09-10 02:32:27.159798', 'step': 6412, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:27.189605', 'step': 6412, 'epoch': 3} {'type': 'loss', 'content': 0.001088503166101873, 'timestamp': '2025-09-10 02:32:27.191484', 'step': 6413, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:27.222539', 'step': 6413, 'epoch': 3} {'type': 'loss', 'content': 0.022781027480959892, 'timestamp': '2025-09-10 02:32:27.224260', 'step': 6414, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:27.256822', 'step': 6414, 'epoch': 3} {'type': 'loss', 'content': 0.00010405414650449529, 'timestamp': '2025-09-10 02:32:27.258526', 'step': 6415, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:27.287476', 'step': 6415, 'epoch': 3} {'type': 'loss', 'content': 0.012186779640614986, 'timestamp': '2025-09-10 02:32:27.310809', 'step': 6416, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:27.342880', 'step': 6416, 'epoch': 3} {'type': 'loss', 'content': 0.0024998204316943884, 'timestamp': '2025-09-10 02:32:27.344656', 'step': 6417, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:27.374541', 'step': 6417, 'epoch': 3} {'type': 'loss', 'content': 0.0008467523148283362, 'timestamp': '2025-09-10 02:32:27.376279', 'step': 6418, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:27.411390', 'step': 6418, 'epoch': 3} {'type': 'loss', 'content': 0.015074108727276325, 'timestamp': '2025-09-10 02:32:27.413530', 'step': 6419, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:27.447029', 'step': 6419, 'epoch': 3} {'type': 'loss', 'content': 0.0007348424405790865, 'timestamp': '2025-09-10 02:32:27.470643', 'step': 6420, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:27.506370', 'step': 6420, 'epoch': 3} {'type': 'loss', 'content': 0.00017972067871596664, 'timestamp': '2025-09-10 02:32:27.508356', 'step': 6421, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:27.539217', 'step': 6421, 'epoch': 3} {'type': 'loss', 'content': 0.02017919160425663, 'timestamp': '2025-09-10 02:32:27.540995', 'step': 6422, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:27.575634', 'step': 6422, 'epoch': 3} {'type': 'loss', 'content': 0.002349887741729617, 'timestamp': '2025-09-10 02:32:27.577425', 'step': 6423, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:27.609793', 'step': 6423, 'epoch': 3} {'type': 'loss', 'content': 0.01225587073713541, 'timestamp': '2025-09-10 02:32:27.633422', 'step': 6424, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:27.664460', 'step': 6424, 'epoch': 3} {'type': 'loss', 'content': 0.00065768783679232, 'timestamp': '2025-09-10 02:32:27.666521', 'step': 6425, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:27.704778', 'step': 6425, 'epoch': 3} {'type': 'loss', 'content': 0.0003179568739142269, 'timestamp': '2025-09-10 02:32:27.706474', 'step': 6426, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:27.744842', 'step': 6426, 'epoch': 3} {'type': 'loss', 'content': 0.0015685457037761807, 'timestamp': '2025-09-10 02:32:27.746561', 'step': 6427, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:27.780899', 'step': 6427, 'epoch': 3} {'type': 'loss', 'content': 0.0005152070079930127, 'timestamp': '2025-09-10 02:32:27.804217', 'step': 6428, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:27.832914', 'step': 6428, 'epoch': 3} {'type': 'loss', 'content': 0.0001408977113896981, 'timestamp': '2025-09-10 02:32:27.834813', 'step': 6429, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:27.863907', 'step': 6429, 'epoch': 3} {'type': 'loss', 'content': 0.002704629208892584, 'timestamp': '2025-09-10 02:32:27.865720', 'step': 6430, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:27.894410', 'step': 6430, 'epoch': 3} {'type': 'loss', 'content': 0.00012314648483879864, 'timestamp': '2025-09-10 02:32:27.896345', 'step': 6431, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:27.925444', 'step': 6431, 'epoch': 3} {'type': 'loss', 'content': 0.00018916084081865847, 'timestamp': '2025-09-10 02:32:27.949052', 'step': 6432, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:27.978103', 'step': 6432, 'epoch': 3} {'type': 'loss', 'content': 0.0003389800258446485, 'timestamp': '2025-09-10 02:32:27.979804', 'step': 6433, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:28.008486', 'step': 6433, 'epoch': 3} {'type': 'loss', 'content': 0.0007000124314799905, 'timestamp': '2025-09-10 02:32:28.010596', 'step': 6434, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:28.041613', 'step': 6434, 'epoch': 3} {'type': 'loss', 'content': 0.0007533311145380139, 'timestamp': '2025-09-10 02:32:28.043374', 'step': 6435, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-10 02:32:28.072405', 'step': 6435, 'epoch': 3} {'type': 'loss', 'content': 0.0007968792924657464, 'timestamp': '2025-09-10 02:32:28.095919', 'step': 6436, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:28.124707', 'step': 6436, 'epoch': 3} {'type': 'loss', 'content': 0.024879014119505882, 'timestamp': '2025-09-10 02:32:28.126523', 'step': 6437, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:28.155148', 'step': 6437, 'epoch': 3} {'type': 'loss', 'content': 0.00015964091289788485, 'timestamp': '2025-09-10 02:32:28.157015', 'step': 6438, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:28.185887', 'step': 6438, 'epoch': 3} {'type': 'loss', 'content': 0.004214493092149496, 'timestamp': '2025-09-10 02:32:28.187736', 'step': 6439, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:28.216726', 'step': 6439, 'epoch': 3} {'type': 'loss', 'content': 0.0002616394485812634, 'timestamp': '2025-09-10 02:32:28.239809', 'step': 6440, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:28.269085', 'step': 6440, 'epoch': 3} {'type': 'loss', 'content': 0.0021432521753013134, 'timestamp': '2025-09-10 02:32:28.271027', 'step': 6441, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:28.300758', 'step': 6441, 'epoch': 3} {'type': 'loss', 'content': 0.0003029382205568254, 'timestamp': '2025-09-10 02:32:28.302630', 'step': 6442, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:28.331086', 'step': 6442, 'epoch': 3} {'type': 'loss', 'content': 0.009857903234660625, 'timestamp': '2025-09-10 02:32:28.333089', 'step': 6443, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:28.362184', 'step': 6443, 'epoch': 3} {'type': 'loss', 'content': 0.0071450648829340935, 'timestamp': '2025-09-10 02:32:28.385443', 'step': 6444, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:28.415428', 'step': 6444, 'epoch': 3} {'type': 'loss', 'content': 0.046203065663576126, 'timestamp': '2025-09-10 02:32:28.417207', 'step': 6445, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:28.446450', 'step': 6445, 'epoch': 3} {'type': 'loss', 'content': 0.00039324097451753914, 'timestamp': '2025-09-10 02:32:28.448320', 'step': 6446, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:28.477751', 'step': 6446, 'epoch': 3} {'type': 'loss', 'content': 0.0001980369124794379, 'timestamp': '2025-09-10 02:32:28.479467', 'step': 6447, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:28.508459', 'step': 6447, 'epoch': 3} {'type': 'loss', 'content': 0.00044665136374533176, 'timestamp': '2025-09-10 02:32:28.531909', 'step': 6448, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:28.561059', 'step': 6448, 'epoch': 3} {'type': 'loss', 'content': 0.00021955862757749856, 'timestamp': '2025-09-10 02:32:28.563011', 'step': 6449, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:28.592061', 'step': 6449, 'epoch': 3} {'type': 'loss', 'content': 0.028430433943867683, 'timestamp': '2025-09-10 02:32:28.593655', 'step': 6450, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:28.622758', 'step': 6450, 'epoch': 3} {'type': 'loss', 'content': 0.0043237460777163506, 'timestamp': '2025-09-10 02:32:28.624667', 'step': 6451, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:28.654233', 'step': 6451, 'epoch': 3} {'type': 'loss', 'content': 0.0006920182495377958, 'timestamp': '2025-09-10 02:32:28.677676', 'step': 6452, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:28.707426', 'step': 6452, 'epoch': 3} {'type': 'loss', 'content': 0.0002523947914596647, 'timestamp': '2025-09-10 02:32:28.709393', 'step': 6453, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:28.739242', 'step': 6453, 'epoch': 3} {'type': 'loss', 'content': 0.004871721845120192, 'timestamp': '2025-09-10 02:32:28.741467', 'step': 6454, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:28.772034', 'step': 6454, 'epoch': 3} {'type': 'loss', 'content': 0.00031806857441551983, 'timestamp': '2025-09-10 02:32:28.773788', 'step': 6455, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:28.807393', 'step': 6455, 'epoch': 3} {'type': 'loss', 'content': 0.00031787189072929323, 'timestamp': '2025-09-10 02:32:28.830702', 'step': 6456, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:28.859662', 'step': 6456, 'epoch': 3} {'type': 'loss', 'content': 0.0002223090996267274, 'timestamp': '2025-09-10 02:32:28.861417', 'step': 6457, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:28.890544', 'step': 6457, 'epoch': 3} {'type': 'loss', 'content': 0.01526606921106577, 'timestamp': '2025-09-10 02:32:28.892390', 'step': 6458, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:28.920890', 'step': 6458, 'epoch': 3} {'type': 'loss', 'content': 0.00029888629796914756, 'timestamp': '2025-09-10 02:32:28.922848', 'step': 6459, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:28.951455', 'step': 6459, 'epoch': 3} {'type': 'loss', 'content': 0.00015020419959910214, 'timestamp': '2025-09-10 02:32:28.974975', 'step': 6460, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:29.003695', 'step': 6460, 'epoch': 3} {'type': 'loss', 'content': 0.000799665111117065, 'timestamp': '2025-09-10 02:32:29.005540', 'step': 6461, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:29.034179', 'step': 6461, 'epoch': 3} {'type': 'loss', 'content': 0.019971687346696854, 'timestamp': '2025-09-10 02:32:29.035987', 'step': 6462, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:29.065180', 'step': 6462, 'epoch': 3} {'type': 'loss', 'content': 0.0021395846270024776, 'timestamp': '2025-09-10 02:32:29.067000', 'step': 6463, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:29.095920', 'step': 6463, 'epoch': 3} {'type': 'loss', 'content': 0.005740799009799957, 'timestamp': '2025-09-10 02:32:29.119107', 'step': 6464, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:29.149274', 'step': 6464, 'epoch': 3} {'type': 'loss', 'content': 0.0009019740973599255, 'timestamp': '2025-09-10 02:32:29.151097', 'step': 6465, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:29.181349', 'step': 6465, 'epoch': 3} {'type': 'loss', 'content': 0.0002621648891363293, 'timestamp': '2025-09-10 02:32:29.182790', 'step': 6466, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:29.216922', 'step': 6466, 'epoch': 3} {'type': 'loss', 'content': 0.001667393953539431, 'timestamp': '2025-09-10 02:32:29.218598', 'step': 6467, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:29.254355', 'step': 6467, 'epoch': 3} {'type': 'loss', 'content': 0.00014791438297834247, 'timestamp': '2025-09-10 02:32:29.277772', 'step': 6468, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:29.308880', 'step': 6468, 'epoch': 3} {'type': 'loss', 'content': 0.00028856098651885986, 'timestamp': '2025-09-10 02:32:29.310965', 'step': 6469, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:29.342171', 'step': 6469, 'epoch': 3} {'type': 'loss', 'content': 0.0004741291340906173, 'timestamp': '2025-09-10 02:32:29.343911', 'step': 6470, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:29.374174', 'step': 6470, 'epoch': 3} {'type': 'loss', 'content': 0.00037020345916971564, 'timestamp': '2025-09-10 02:32:29.375910', 'step': 6471, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:29.409523', 'step': 6471, 'epoch': 3} {'type': 'loss', 'content': 0.0003086226643063128, 'timestamp': '2025-09-10 02:32:29.432799', 'step': 6472, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:29.464501', 'step': 6472, 'epoch': 3} {'type': 'loss', 'content': 0.00035341139300726354, 'timestamp': '2025-09-10 02:32:29.466338', 'step': 6473, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:29.503123', 'step': 6473, 'epoch': 3} {'type': 'loss', 'content': 0.0007538163335993886, 'timestamp': '2025-09-10 02:32:29.505038', 'step': 6474, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:29.535596', 'step': 6474, 'epoch': 3} {'type': 'loss', 'content': 0.00029482340323738754, 'timestamp': '2025-09-10 02:32:29.537350', 'step': 6475, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:29.570351', 'step': 6475, 'epoch': 3} {'type': 'loss', 'content': 0.00020520342513918877, 'timestamp': '2025-09-10 02:32:29.593609', 'step': 6476, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:29.624945', 'step': 6476, 'epoch': 3} {'type': 'loss', 'content': 0.0007698552799411118, 'timestamp': '2025-09-10 02:32:29.626829', 'step': 6477, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:29.660422', 'step': 6477, 'epoch': 3} {'type': 'loss', 'content': 0.00027317609055899084, 'timestamp': '2025-09-10 02:32:29.662133', 'step': 6478, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:29.699765', 'step': 6478, 'epoch': 3} {'type': 'loss', 'content': 0.00039539759745821357, 'timestamp': '2025-09-10 02:32:29.701609', 'step': 6479, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:29.738272', 'step': 6479, 'epoch': 3} {'type': 'loss', 'content': 0.0001573774206917733, 'timestamp': '2025-09-10 02:32:29.761794', 'step': 6480, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:29.798776', 'step': 6480, 'epoch': 3} {'type': 'loss', 'content': 0.0009613978327251971, 'timestamp': '2025-09-10 02:32:29.800680', 'step': 6481, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:29.829808', 'step': 6481, 'epoch': 3} {'type': 'loss', 'content': 0.00011410355364205316, 'timestamp': '2025-09-10 02:32:29.831698', 'step': 6482, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:29.861112', 'step': 6482, 'epoch': 3} {'type': 'loss', 'content': 0.009019218385219574, 'timestamp': '2025-09-10 02:32:29.863074', 'step': 6483, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:29.892711', 'step': 6483, 'epoch': 3} {'type': 'loss', 'content': 0.0010529905557632446, 'timestamp': '2025-09-10 02:32:29.916201', 'step': 6484, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:29.945707', 'step': 6484, 'epoch': 3} {'type': 'loss', 'content': 0.0009112291736528277, 'timestamp': '2025-09-10 02:32:29.947684', 'step': 6485, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:29.976565', 'step': 6485, 'epoch': 3} {'type': 'loss', 'content': 0.0009821915300562978, 'timestamp': '2025-09-10 02:32:29.978294', 'step': 6486, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:30.007599', 'step': 6486, 'epoch': 3} {'type': 'loss', 'content': 0.0005452854675240815, 'timestamp': '2025-09-10 02:32:30.009395', 'step': 6487, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:30.038441', 'step': 6487, 'epoch': 3} {'type': 'loss', 'content': 0.0002243387425551191, 'timestamp': '2025-09-10 02:32:30.061890', 'step': 6488, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:30.091022', 'step': 6488, 'epoch': 3} {'type': 'loss', 'content': 0.004632903728634119, 'timestamp': '2025-09-10 02:32:30.092822', 'step': 6489, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:30.121640', 'step': 6489, 'epoch': 3} {'type': 'loss', 'content': 0.0002741267380770296, 'timestamp': '2025-09-10 02:32:30.123853', 'step': 6490, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:30.153171', 'step': 6490, 'epoch': 3} {'type': 'loss', 'content': 0.002974990289658308, 'timestamp': '2025-09-10 02:32:30.155007', 'step': 6491, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:30.183634', 'step': 6491, 'epoch': 3} {'type': 'loss', 'content': 0.0008022545953281224, 'timestamp': '2025-09-10 02:32:30.207133', 'step': 6492, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:30.236240', 'step': 6492, 'epoch': 3} {'type': 'loss', 'content': 0.001183294109068811, 'timestamp': '2025-09-10 02:32:30.238045', 'step': 6493, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:30.267032', 'step': 6493, 'epoch': 3} {'type': 'loss', 'content': 0.00015620666090399027, 'timestamp': '2025-09-10 02:32:30.268715', 'step': 6494, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:30.297612', 'step': 6494, 'epoch': 3} {'type': 'loss', 'content': 0.001702985493466258, 'timestamp': '2025-09-10 02:32:30.299391', 'step': 6495, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:30.328773', 'step': 6495, 'epoch': 3} {'type': 'loss', 'content': 0.009991460479795933, 'timestamp': '2025-09-10 02:32:30.353644', 'step': 6496, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:30.382997', 'step': 6496, 'epoch': 3} {'type': 'loss', 'content': 0.0010712883668020368, 'timestamp': '2025-09-10 02:32:30.384842', 'step': 6497, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:30.413409', 'step': 6497, 'epoch': 3} {'type': 'loss', 'content': 0.0003185864188708365, 'timestamp': '2025-09-10 02:32:30.415178', 'step': 6498, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:30.444350', 'step': 6498, 'epoch': 3} {'type': 'loss', 'content': 0.03873638063669205, 'timestamp': '2025-09-10 02:32:30.446077', 'step': 6499, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:30.474911', 'step': 6499, 'epoch': 3} {'type': 'loss', 'content': 0.01748695597052574, 'timestamp': '2025-09-10 02:32:30.498343', 'step': 6500, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 6500', 'timestamp': '2025-09-10 02:32:35.848016', 'step': 6500, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:35.882307', 'step': 6500, 'epoch': 3} {'type': 'loss', 'content': 0.0004379930905997753, 'timestamp': '2025-09-10 02:32:35.884426', 'step': 6501, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:35.914425', 'step': 6501, 'epoch': 3} {'type': 'loss', 'content': 0.00020205129112582654, 'timestamp': '2025-09-10 02:32:35.916494', 'step': 6502, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:35.946040', 'step': 6502, 'epoch': 3} {'type': 'loss', 'content': 0.00021745593403466046, 'timestamp': '2025-09-10 02:32:35.948076', 'step': 6503, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:35.977988', 'step': 6503, 'epoch': 3} {'type': 'loss', 'content': 0.054765503853559494, 'timestamp': '2025-09-10 02:32:36.001826', 'step': 6504, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:36.032374', 'step': 6504, 'epoch': 3} {'type': 'loss', 'content': 0.0003472270618658513, 'timestamp': '2025-09-10 02:32:36.035343', 'step': 6505, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:36.065999', 'step': 6505, 'epoch': 3} {'type': 'loss', 'content': 0.0031078639440238476, 'timestamp': '2025-09-10 02:32:36.067831', 'step': 6506, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:36.102429', 'step': 6506, 'epoch': 3} {'type': 'loss', 'content': 0.00019415069255046546, 'timestamp': '2025-09-10 02:32:36.104078', 'step': 6507, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:36.132983', 'step': 6507, 'epoch': 3} {'type': 'loss', 'content': 0.00020654825493693352, 'timestamp': '2025-09-10 02:32:36.156577', 'step': 6508, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:36.185567', 'step': 6508, 'epoch': 3} {'type': 'loss', 'content': 0.0001644888980081305, 'timestamp': '2025-09-10 02:32:36.187513', 'step': 6509, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:36.216402', 'step': 6509, 'epoch': 3} {'type': 'loss', 'content': 0.00035848282277584076, 'timestamp': '2025-09-10 02:32:36.218956', 'step': 6510, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:36.248694', 'step': 6510, 'epoch': 3} {'type': 'loss', 'content': 0.0015532153192907572, 'timestamp': '2025-09-10 02:32:36.250663', 'step': 6511, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:36.279957', 'step': 6511, 'epoch': 3} {'type': 'loss', 'content': 0.00032827811082825065, 'timestamp': '2025-09-10 02:32:36.304049', 'step': 6512, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:36.338393', 'step': 6512, 'epoch': 3} {'type': 'loss', 'content': 0.0003724382841028273, 'timestamp': '2025-09-10 02:32:36.340184', 'step': 6513, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:36.369704', 'step': 6513, 'epoch': 3} {'type': 'loss', 'content': 0.000916356104426086, 'timestamp': '2025-09-10 02:32:36.371431', 'step': 6514, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:36.400696', 'step': 6514, 'epoch': 3} {'type': 'loss', 'content': 0.00069367018295452, 'timestamp': '2025-09-10 02:32:36.403946', 'step': 6515, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:36.434251', 'step': 6515, 'epoch': 3} {'type': 'loss', 'content': 0.00020402041263878345, 'timestamp': '2025-09-10 02:32:36.457611', 'step': 6516, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:36.486949', 'step': 6516, 'epoch': 3} {'type': 'loss', 'content': 0.00026494014309719205, 'timestamp': '2025-09-10 02:32:36.488841', 'step': 6517, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:36.518078', 'step': 6517, 'epoch': 3} {'type': 'loss', 'content': 0.0015278218779712915, 'timestamp': '2025-09-10 02:32:36.519925', 'step': 6518, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:36.548784', 'step': 6518, 'epoch': 3} {'type': 'loss', 'content': 0.00026909203734248877, 'timestamp': '2025-09-10 02:32:36.550848', 'step': 6519, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:36.580220', 'step': 6519, 'epoch': 3} {'type': 'loss', 'content': 0.0050692250952124596, 'timestamp': '2025-09-10 02:32:36.603916', 'step': 6520, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:36.633367', 'step': 6520, 'epoch': 3} {'type': 'loss', 'content': 0.008903230540454388, 'timestamp': '2025-09-10 02:32:36.635194', 'step': 6521, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:36.664694', 'step': 6521, 'epoch': 3} {'type': 'loss', 'content': 0.005301364231854677, 'timestamp': '2025-09-10 02:32:36.666424', 'step': 6522, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:36.696205', 'step': 6522, 'epoch': 3} {'type': 'loss', 'content': 0.001449521048925817, 'timestamp': '2025-09-10 02:32:36.698179', 'step': 6523, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:36.728243', 'step': 6523, 'epoch': 3} {'type': 'loss', 'content': 0.0001892952568596229, 'timestamp': '2025-09-10 02:32:36.751747', 'step': 6524, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:36.784415', 'step': 6524, 'epoch': 3} {'type': 'loss', 'content': 0.000138781892019324, 'timestamp': '2025-09-10 02:32:36.786441', 'step': 6525, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:36.820062', 'step': 6525, 'epoch': 3} {'type': 'loss', 'content': 0.0004280885332264006, 'timestamp': '2025-09-10 02:32:36.822254', 'step': 6526, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:36.851819', 'step': 6526, 'epoch': 3} {'type': 'loss', 'content': 0.00021600235777441412, 'timestamp': '2025-09-10 02:32:36.853822', 'step': 6527, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:36.882890', 'step': 6527, 'epoch': 3} {'type': 'loss', 'content': 0.00652282265946269, 'timestamp': '2025-09-10 02:32:36.906555', 'step': 6528, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:36.935891', 'step': 6528, 'epoch': 3} {'type': 'loss', 'content': 0.0005161373992450535, 'timestamp': '2025-09-10 02:32:36.937785', 'step': 6529, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:36.966574', 'step': 6529, 'epoch': 3} {'type': 'loss', 'content': 0.0001188876703963615, 'timestamp': '2025-09-10 02:32:36.968460', 'step': 6530, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:36.997903', 'step': 6530, 'epoch': 3} {'type': 'loss', 'content': 0.0005654821288771927, 'timestamp': '2025-09-10 02:32:36.999944', 'step': 6531, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:37.028839', 'step': 6531, 'epoch': 3} {'type': 'loss', 'content': 0.0016985258553177118, 'timestamp': '2025-09-10 02:32:37.052588', 'step': 6532, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:37.082164', 'step': 6532, 'epoch': 3} {'type': 'loss', 'content': 0.05521092563867569, 'timestamp': '2025-09-10 02:32:37.083835', 'step': 6533, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:37.112744', 'step': 6533, 'epoch': 3} {'type': 'loss', 'content': 0.0023533659987151623, 'timestamp': '2025-09-10 02:32:37.114501', 'step': 6534, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:37.143504', 'step': 6534, 'epoch': 3} {'type': 'loss', 'content': 0.00022290900233201683, 'timestamp': '2025-09-10 02:32:37.145571', 'step': 6535, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:37.176165', 'step': 6535, 'epoch': 3} {'type': 'loss', 'content': 0.00036111968802288175, 'timestamp': '2025-09-10 02:32:37.199493', 'step': 6536, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1582003754624}], 'timestamp': '2025-09-10 02:32:39.104896', 'step': 6536, 'epoch': 3} {'type': 'pplx', 'content': 2442277.5126239713, 'timestamp': '2025-09-10 02:32:39.106861', 'step': 6536, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:39.135396', 'step': 6536, 'epoch': 3} {'type': 'loss', 'content': 9.491186210652813e-05, 'timestamp': '2025-09-10 02:32:39.137280', 'step': 6537, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:39.166639', 'step': 6537, 'epoch': 3} {'type': 'loss', 'content': 0.002770493272691965, 'timestamp': '2025-09-10 02:32:39.168821', 'step': 6538, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:39.197696', 'step': 6538, 'epoch': 3} {'type': 'loss', 'content': 9.687560668680817e-05, 'timestamp': '2025-09-10 02:32:39.199608', 'step': 6539, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:39.228426', 'step': 6539, 'epoch': 3} {'type': 'loss', 'content': 0.00010829438542714342, 'timestamp': '2025-09-10 02:32:39.251928', 'step': 6540, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:39.281801', 'step': 6540, 'epoch': 3} {'type': 'loss', 'content': 9.18650912353769e-05, 'timestamp': '2025-09-10 02:32:39.283585', 'step': 6541, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:39.313806', 'step': 6541, 'epoch': 3} {'type': 'loss', 'content': 0.021688321605324745, 'timestamp': '2025-09-10 02:32:39.315828', 'step': 6542, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:39.348751', 'step': 6542, 'epoch': 3} {'type': 'loss', 'content': 0.00018935799016617239, 'timestamp': '2025-09-10 02:32:39.350479', 'step': 6543, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:39.382829', 'step': 6543, 'epoch': 3} {'type': 'loss', 'content': 0.001111075864173472, 'timestamp': '2025-09-10 02:32:39.406216', 'step': 6544, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:39.440121', 'step': 6544, 'epoch': 3} {'type': 'loss', 'content': 0.0008615261758677661, 'timestamp': '2025-09-10 02:32:39.441900', 'step': 6545, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:39.475702', 'step': 6545, 'epoch': 3} {'type': 'loss', 'content': 0.0017964442959055305, 'timestamp': '2025-09-10 02:32:39.477680', 'step': 6546, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:39.511839', 'step': 6546, 'epoch': 3} {'type': 'loss', 'content': 0.00037840628647245467, 'timestamp': '2025-09-10 02:32:39.514089', 'step': 6547, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:39.545205', 'step': 6547, 'epoch': 3} {'type': 'loss', 'content': 0.0003863736055791378, 'timestamp': '2025-09-10 02:32:39.568621', 'step': 6548, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:39.601634', 'step': 6548, 'epoch': 3} {'type': 'loss', 'content': 0.00015133467968553305, 'timestamp': '2025-09-10 02:32:39.603773', 'step': 6549, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:39.637071', 'step': 6549, 'epoch': 3} {'type': 'loss', 'content': 0.0020984576549381018, 'timestamp': '2025-09-10 02:32:39.639094', 'step': 6550, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:39.671372', 'step': 6550, 'epoch': 3} {'type': 'loss', 'content': 9.021971345646307e-05, 'timestamp': '2025-09-10 02:32:39.673127', 'step': 6551, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:39.710146', 'step': 6551, 'epoch': 3} {'type': 'loss', 'content': 0.0017723581986501813, 'timestamp': '2025-09-10 02:32:39.733782', 'step': 6552, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:39.768078', 'step': 6552, 'epoch': 3} {'type': 'loss', 'content': 0.019412502646446228, 'timestamp': '2025-09-10 02:32:39.769980', 'step': 6553, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:39.808623', 'step': 6553, 'epoch': 3} {'type': 'loss', 'content': 0.00012914600665681064, 'timestamp': '2025-09-10 02:32:39.810616', 'step': 6554, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:39.840018', 'step': 6554, 'epoch': 3} {'type': 'loss', 'content': 0.024290457367897034, 'timestamp': '2025-09-10 02:32:39.841957', 'step': 6555, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:39.870807', 'step': 6555, 'epoch': 3} {'type': 'loss', 'content': 0.0001335710840066895, 'timestamp': '2025-09-10 02:32:39.894147', 'step': 6556, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:39.923489', 'step': 6556, 'epoch': 3} {'type': 'loss', 'content': 0.0010500989155843854, 'timestamp': '2025-09-10 02:32:39.925440', 'step': 6557, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:39.954724', 'step': 6557, 'epoch': 3} {'type': 'loss', 'content': 0.00023776150192134082, 'timestamp': '2025-09-10 02:32:39.956591', 'step': 6558, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:39.985615', 'step': 6558, 'epoch': 3} {'type': 'loss', 'content': 0.002251929370686412, 'timestamp': '2025-09-10 02:32:39.987556', 'step': 6559, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:40.016617', 'step': 6559, 'epoch': 3} {'type': 'loss', 'content': 0.005498126614838839, 'timestamp': '2025-09-10 02:32:40.039894', 'step': 6560, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:40.069334', 'step': 6560, 'epoch': 3} {'type': 'loss', 'content': 0.00023501995019614697, 'timestamp': '2025-09-10 02:32:40.071051', 'step': 6561, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:40.099980', 'step': 6561, 'epoch': 3} {'type': 'loss', 'content': 0.02420176938176155, 'timestamp': '2025-09-10 02:32:40.101928', 'step': 6562, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:40.131025', 'step': 6562, 'epoch': 3} {'type': 'loss', 'content': 0.00041470900760032237, 'timestamp': '2025-09-10 02:32:40.133015', 'step': 6563, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:40.162061', 'step': 6563, 'epoch': 3} {'type': 'loss', 'content': 0.04961610585451126, 'timestamp': '2025-09-10 02:32:40.185165', 'step': 6564, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:40.214342', 'step': 6564, 'epoch': 3} {'type': 'loss', 'content': 0.0017416634364053607, 'timestamp': '2025-09-10 02:32:40.216253', 'step': 6565, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:40.245846', 'step': 6565, 'epoch': 3} {'type': 'loss', 'content': 0.0006035008700564504, 'timestamp': '2025-09-10 02:32:40.247768', 'step': 6566, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:40.276591', 'step': 6566, 'epoch': 3} {'type': 'loss', 'content': 0.0019087980035692453, 'timestamp': '2025-09-10 02:32:40.278495', 'step': 6567, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:40.307859', 'step': 6567, 'epoch': 3} {'type': 'loss', 'content': 0.00014120301057118922, 'timestamp': '2025-09-10 02:32:40.331062', 'step': 6568, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:40.360052', 'step': 6568, 'epoch': 3} {'type': 'loss', 'content': 0.00040765447192825377, 'timestamp': '2025-09-10 02:32:40.361603', 'step': 6569, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:40.390390', 'step': 6569, 'epoch': 3} {'type': 'loss', 'content': 0.014113292098045349, 'timestamp': '2025-09-10 02:32:40.392163', 'step': 6570, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:40.421926', 'step': 6570, 'epoch': 3} {'type': 'loss', 'content': 0.0002716171438805759, 'timestamp': '2025-09-10 02:32:40.423906', 'step': 6571, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:40.453222', 'step': 6571, 'epoch': 3} {'type': 'loss', 'content': 0.002028391696512699, 'timestamp': '2025-09-10 02:32:40.476530', 'step': 6572, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:40.506084', 'step': 6572, 'epoch': 3} {'type': 'loss', 'content': 0.0002613048709463328, 'timestamp': '2025-09-10 02:32:40.508009', 'step': 6573, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:40.537230', 'step': 6573, 'epoch': 3} {'type': 'loss', 'content': 0.00010381250467617065, 'timestamp': '2025-09-10 02:32:40.539128', 'step': 6574, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:40.568099', 'step': 6574, 'epoch': 3} {'type': 'loss', 'content': 0.01973879523575306, 'timestamp': '2025-09-10 02:32:40.569690', 'step': 6575, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:40.598471', 'step': 6575, 'epoch': 3} {'type': 'loss', 'content': 0.00020547016174532473, 'timestamp': '2025-09-10 02:32:40.622083', 'step': 6576, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:40.651283', 'step': 6576, 'epoch': 3} {'type': 'loss', 'content': 0.004909324925392866, 'timestamp': '2025-09-10 02:32:40.653336', 'step': 6577, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:40.682739', 'step': 6577, 'epoch': 3} {'type': 'loss', 'content': 0.00044479951611720026, 'timestamp': '2025-09-10 02:32:40.684392', 'step': 6578, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:40.712954', 'step': 6578, 'epoch': 3} {'type': 'loss', 'content': 0.00040925919893197715, 'timestamp': '2025-09-10 02:32:40.714889', 'step': 6579, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:40.744283', 'step': 6579, 'epoch': 3} {'type': 'loss', 'content': 7.322274177568033e-05, 'timestamp': '2025-09-10 02:32:40.767515', 'step': 6580, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:40.801556', 'step': 6580, 'epoch': 3} {'type': 'loss', 'content': 0.00012532089021988213, 'timestamp': '2025-09-10 02:32:40.803421', 'step': 6581, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:40.832604', 'step': 6581, 'epoch': 3} {'type': 'loss', 'content': 0.0003361101262271404, 'timestamp': '2025-09-10 02:32:40.834532', 'step': 6582, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:40.863787', 'step': 6582, 'epoch': 3} {'type': 'loss', 'content': 0.05063135549426079, 'timestamp': '2025-09-10 02:32:40.865402', 'step': 6583, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:40.895272', 'step': 6583, 'epoch': 3} {'type': 'loss', 'content': 0.0002015512145590037, 'timestamp': '2025-09-10 02:32:40.918681', 'step': 6584, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:40.947702', 'step': 6584, 'epoch': 3} {'type': 'loss', 'content': 0.0013774631079286337, 'timestamp': '2025-09-10 02:32:40.949864', 'step': 6585, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:40.978564', 'step': 6585, 'epoch': 3} {'type': 'loss', 'content': 0.0018797398079186678, 'timestamp': '2025-09-10 02:32:40.980364', 'step': 6586, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:41.009098', 'step': 6586, 'epoch': 3} {'type': 'loss', 'content': 0.0001342600298812613, 'timestamp': '2025-09-10 02:32:41.010843', 'step': 6587, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:41.039672', 'step': 6587, 'epoch': 3} {'type': 'loss', 'content': 0.00042086574831046164, 'timestamp': '2025-09-10 02:32:41.062935', 'step': 6588, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:41.091727', 'step': 6588, 'epoch': 3} {'type': 'loss', 'content': 0.0003665420808829367, 'timestamp': '2025-09-10 02:32:41.093733', 'step': 6589, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:41.123185', 'step': 6589, 'epoch': 3} {'type': 'loss', 'content': 0.00016097302432172, 'timestamp': '2025-09-10 02:32:41.125172', 'step': 6590, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:41.154086', 'step': 6590, 'epoch': 3} {'type': 'loss', 'content': 0.0009196820901706815, 'timestamp': '2025-09-10 02:32:41.156246', 'step': 6591, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:41.186820', 'step': 6591, 'epoch': 3} {'type': 'loss', 'content': 0.02848992869257927, 'timestamp': '2025-09-10 02:32:41.210264', 'step': 6592, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:41.243348', 'step': 6592, 'epoch': 3} {'type': 'loss', 'content': 0.00047538583748973906, 'timestamp': '2025-09-10 02:32:41.245298', 'step': 6593, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:41.277785', 'step': 6593, 'epoch': 3} {'type': 'loss', 'content': 0.0005702796042896807, 'timestamp': '2025-09-10 02:32:41.279570', 'step': 6594, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:41.311696', 'step': 6594, 'epoch': 3} {'type': 'loss', 'content': 0.0009750212775543332, 'timestamp': '2025-09-10 02:32:41.313599', 'step': 6595, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:41.346892', 'step': 6595, 'epoch': 3} {'type': 'loss', 'content': 0.00544664915651083, 'timestamp': '2025-09-10 02:32:41.370439', 'step': 6596, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:41.405339', 'step': 6596, 'epoch': 3} {'type': 'loss', 'content': 0.0012348754098638892, 'timestamp': '2025-09-10 02:32:41.407218', 'step': 6597, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:41.440258', 'step': 6597, 'epoch': 3} {'type': 'loss', 'content': 0.00028111092979088426, 'timestamp': '2025-09-10 02:32:41.442254', 'step': 6598, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:41.476126', 'step': 6598, 'epoch': 3} {'type': 'loss', 'content': 0.0005132206133566797, 'timestamp': '2025-09-10 02:32:41.478297', 'step': 6599, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:41.512238', 'step': 6599, 'epoch': 3} {'type': 'loss', 'content': 0.0005630544037558138, 'timestamp': '2025-09-10 02:32:41.535573', 'step': 6600, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:41.568207', 'step': 6600, 'epoch': 3} {'type': 'loss', 'content': 0.00023868770222179592, 'timestamp': '2025-09-10 02:32:41.569980', 'step': 6601, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:41.603702', 'step': 6601, 'epoch': 3} {'type': 'loss', 'content': 0.0003321226977277547, 'timestamp': '2025-09-10 02:32:41.605795', 'step': 6602, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:41.639179', 'step': 6602, 'epoch': 3} {'type': 'loss', 'content': 0.000865118287038058, 'timestamp': '2025-09-10 02:32:41.641142', 'step': 6603, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:41.674160', 'step': 6603, 'epoch': 3} {'type': 'loss', 'content': 0.0004023853980470449, 'timestamp': '2025-09-10 02:32:41.697349', 'step': 6604, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:41.735098', 'step': 6604, 'epoch': 3} {'type': 'loss', 'content': 0.0004497812769841403, 'timestamp': '2025-09-10 02:32:41.737087', 'step': 6605, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:41.774016', 'step': 6605, 'epoch': 3} {'type': 'loss', 'content': 0.0006717675132676959, 'timestamp': '2025-09-10 02:32:41.775665', 'step': 6606, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:41.813970', 'step': 6606, 'epoch': 3} {'type': 'loss', 'content': 0.0006070249364711344, 'timestamp': '2025-09-10 02:32:41.815776', 'step': 6607, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:41.844643', 'step': 6607, 'epoch': 3} {'type': 'loss', 'content': 0.0029559044633060694, 'timestamp': '2025-09-10 02:32:41.868019', 'step': 6608, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:41.897415', 'step': 6608, 'epoch': 3} {'type': 'loss', 'content': 0.0004757153510581702, 'timestamp': '2025-09-10 02:32:41.899334', 'step': 6609, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:41.928411', 'step': 6609, 'epoch': 3} {'type': 'loss', 'content': 0.0022335194516927004, 'timestamp': '2025-09-10 02:32:41.930182', 'step': 6610, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:41.959185', 'step': 6610, 'epoch': 3} {'type': 'loss', 'content': 0.0005463598063215613, 'timestamp': '2025-09-10 02:32:41.961073', 'step': 6611, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:41.989945', 'step': 6611, 'epoch': 3} {'type': 'loss', 'content': 0.0003097263688687235, 'timestamp': '2025-09-10 02:32:42.013333', 'step': 6612, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:42.042882', 'step': 6612, 'epoch': 3} {'type': 'loss', 'content': 0.0001371066173305735, 'timestamp': '2025-09-10 02:32:42.044964', 'step': 6613, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:42.073937', 'step': 6613, 'epoch': 3} {'type': 'loss', 'content': 0.0014303690986707807, 'timestamp': '2025-09-10 02:32:42.075792', 'step': 6614, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:42.105205', 'step': 6614, 'epoch': 3} {'type': 'loss', 'content': 0.00017707170627545565, 'timestamp': '2025-09-10 02:32:42.107216', 'step': 6615, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:42.135925', 'step': 6615, 'epoch': 3} {'type': 'loss', 'content': 0.0003356249653734267, 'timestamp': '2025-09-10 02:32:42.159272', 'step': 6616, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:42.188698', 'step': 6616, 'epoch': 3} {'type': 'loss', 'content': 0.00032328421366401017, 'timestamp': '2025-09-10 02:32:42.190854', 'step': 6617, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:42.220122', 'step': 6617, 'epoch': 3} {'type': 'loss', 'content': 9.965641220333055e-05, 'timestamp': '2025-09-10 02:32:42.221945', 'step': 6618, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:42.250890', 'step': 6618, 'epoch': 3} {'type': 'loss', 'content': 0.0001957298518391326, 'timestamp': '2025-09-10 02:32:42.252947', 'step': 6619, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:42.281997', 'step': 6619, 'epoch': 3} {'type': 'loss', 'content': 0.013988674618303776, 'timestamp': '2025-09-10 02:32:42.305367', 'step': 6620, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:42.334830', 'step': 6620, 'epoch': 3} {'type': 'loss', 'content': 0.010370716452598572, 'timestamp': '2025-09-10 02:32:42.336897', 'step': 6621, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:42.365498', 'step': 6621, 'epoch': 3} {'type': 'loss', 'content': 0.004063849337399006, 'timestamp': '2025-09-10 02:32:42.367526', 'step': 6622, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:42.396707', 'step': 6622, 'epoch': 3} {'type': 'loss', 'content': 0.007541103754192591, 'timestamp': '2025-09-10 02:32:42.398476', 'step': 6623, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:42.427639', 'step': 6623, 'epoch': 3} {'type': 'loss', 'content': 0.0002139332063961774, 'timestamp': '2025-09-10 02:32:42.451085', 'step': 6624, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:42.480225', 'step': 6624, 'epoch': 3} {'type': 'loss', 'content': 0.00032632803777232766, 'timestamp': '2025-09-10 02:32:42.481878', 'step': 6625, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:42.510973', 'step': 6625, 'epoch': 3} {'type': 'loss', 'content': 0.004863189999014139, 'timestamp': '2025-09-10 02:32:42.513305', 'step': 6626, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:42.542756', 'step': 6626, 'epoch': 3} {'type': 'loss', 'content': 0.0023307842202484608, 'timestamp': '2025-09-10 02:32:42.545066', 'step': 6627, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:42.574190', 'step': 6627, 'epoch': 3} {'type': 'loss', 'content': 0.002439223462715745, 'timestamp': '2025-09-10 02:32:42.598154', 'step': 6628, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:42.627987', 'step': 6628, 'epoch': 3} {'type': 'loss', 'content': 0.0015195738524198532, 'timestamp': '2025-09-10 02:32:42.630059', 'step': 6629, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:42.660475', 'step': 6629, 'epoch': 3} {'type': 'loss', 'content': 0.0012575994478538632, 'timestamp': '2025-09-10 02:32:42.662298', 'step': 6630, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:42.691610', 'step': 6630, 'epoch': 3} {'type': 'loss', 'content': 0.0001379108871333301, 'timestamp': '2025-09-10 02:32:42.693851', 'step': 6631, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:42.723295', 'step': 6631, 'epoch': 3} {'type': 'loss', 'content': 0.0009422508883289993, 'timestamp': '2025-09-10 02:32:42.746680', 'step': 6632, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:42.778011', 'step': 6632, 'epoch': 3} {'type': 'loss', 'content': 0.00043333551730029285, 'timestamp': '2025-09-10 02:32:42.779952', 'step': 6633, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:42.815015', 'step': 6633, 'epoch': 3} {'type': 'loss', 'content': 0.0011779938358813524, 'timestamp': '2025-09-10 02:32:42.817019', 'step': 6634, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:42.846508', 'step': 6634, 'epoch': 3} {'type': 'loss', 'content': 0.0008157030097208917, 'timestamp': '2025-09-10 02:32:42.848852', 'step': 6635, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:42.877699', 'step': 6635, 'epoch': 3} {'type': 'loss', 'content': 0.0035045649856328964, 'timestamp': '2025-09-10 02:32:42.901480', 'step': 6636, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:42.930902', 'step': 6636, 'epoch': 3} {'type': 'loss', 'content': 0.0007069081766530871, 'timestamp': '2025-09-10 02:32:42.933083', 'step': 6637, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:42.962139', 'step': 6637, 'epoch': 3} {'type': 'loss', 'content': 0.00015712481399532408, 'timestamp': '2025-09-10 02:32:42.964033', 'step': 6638, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:42.992991', 'step': 6638, 'epoch': 3} {'type': 'loss', 'content': 0.000399542972445488, 'timestamp': '2025-09-10 02:32:42.994806', 'step': 6639, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:43.024557', 'step': 6639, 'epoch': 3} {'type': 'loss', 'content': 0.0020209425128996372, 'timestamp': '2025-09-10 02:32:43.048111', 'step': 6640, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:43.076830', 'step': 6640, 'epoch': 3} {'type': 'loss', 'content': 0.0005273325950838625, 'timestamp': '2025-09-10 02:32:43.078963', 'step': 6641, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:43.107792', 'step': 6641, 'epoch': 3} {'type': 'loss', 'content': 0.0003790934570133686, 'timestamp': '2025-09-10 02:32:43.109667', 'step': 6642, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:43.138547', 'step': 6642, 'epoch': 3} {'type': 'loss', 'content': 0.00045166281051933765, 'timestamp': '2025-09-10 02:32:43.140675', 'step': 6643, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:43.169541', 'step': 6643, 'epoch': 3} {'type': 'loss', 'content': 0.0004468052356969565, 'timestamp': '2025-09-10 02:32:43.192909', 'step': 6644, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:43.222133', 'step': 6644, 'epoch': 3} {'type': 'loss', 'content': 0.0004113587492611259, 'timestamp': '2025-09-10 02:32:43.224060', 'step': 6645, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:43.257499', 'step': 6645, 'epoch': 3} {'type': 'loss', 'content': 0.001634429907426238, 'timestamp': '2025-09-10 02:32:43.259508', 'step': 6646, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:43.289281', 'step': 6646, 'epoch': 3} {'type': 'loss', 'content': 0.00041421657078899443, 'timestamp': '2025-09-10 02:32:43.291259', 'step': 6647, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:43.320480', 'step': 6647, 'epoch': 3} {'type': 'loss', 'content': 0.0002726706152316183, 'timestamp': '2025-09-10 02:32:43.344449', 'step': 6648, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:43.375906', 'step': 6648, 'epoch': 3} {'type': 'loss', 'content': 0.003396217245608568, 'timestamp': '2025-09-10 02:32:43.378085', 'step': 6649, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:43.412597', 'step': 6649, 'epoch': 3} {'type': 'loss', 'content': 0.0006525564240291715, 'timestamp': '2025-09-10 02:32:43.414876', 'step': 6650, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:43.446559', 'step': 6650, 'epoch': 3} {'type': 'loss', 'content': 0.05705555900931358, 'timestamp': '2025-09-10 02:32:43.448604', 'step': 6651, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:43.482261', 'step': 6651, 'epoch': 3} {'type': 'loss', 'content': 0.00020800225320272148, 'timestamp': '2025-09-10 02:32:43.505680', 'step': 6652, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:43.536776', 'step': 6652, 'epoch': 3} {'type': 'loss', 'content': 0.00018046969489660114, 'timestamp': '2025-09-10 02:32:43.538675', 'step': 6653, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:43.571848', 'step': 6653, 'epoch': 3} {'type': 'loss', 'content': 0.0005861817044205964, 'timestamp': '2025-09-10 02:32:43.574026', 'step': 6654, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:43.607622', 'step': 6654, 'epoch': 3} {'type': 'loss', 'content': 0.0003875174734275788, 'timestamp': '2025-09-10 02:32:43.610103', 'step': 6655, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:43.642519', 'step': 6655, 'epoch': 3} {'type': 'loss', 'content': 0.00019350463117007166, 'timestamp': '2025-09-10 02:32:43.666160', 'step': 6656, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:43.704337', 'step': 6656, 'epoch': 3} {'type': 'loss', 'content': 0.00039134820690378547, 'timestamp': '2025-09-10 02:32:43.706236', 'step': 6657, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:43.744883', 'step': 6657, 'epoch': 3} {'type': 'loss', 'content': 0.0003388922195881605, 'timestamp': '2025-09-10 02:32:43.746963', 'step': 6658, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:43.782195', 'step': 6658, 'epoch': 3} {'type': 'loss', 'content': 0.0007815192802809179, 'timestamp': '2025-09-10 02:32:43.784291', 'step': 6659, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:43.819507', 'step': 6659, 'epoch': 3} {'type': 'loss', 'content': 0.0001469071430619806, 'timestamp': '2025-09-10 02:32:43.843176', 'step': 6660, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:43.873085', 'step': 6660, 'epoch': 3} {'type': 'loss', 'content': 0.00031934047001414, 'timestamp': '2025-09-10 02:32:43.875043', 'step': 6661, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:43.904140', 'step': 6661, 'epoch': 3} {'type': 'loss', 'content': 0.0002618095313664526, 'timestamp': '2025-09-10 02:32:43.906259', 'step': 6662, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:43.936075', 'step': 6662, 'epoch': 3} {'type': 'loss', 'content': 0.0004075879987794906, 'timestamp': '2025-09-10 02:32:43.938038', 'step': 6663, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:43.967028', 'step': 6663, 'epoch': 3} {'type': 'loss', 'content': 0.00012331274047028273, 'timestamp': '2025-09-10 02:32:43.990803', 'step': 6664, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:44.020118', 'step': 6664, 'epoch': 3} {'type': 'loss', 'content': 0.0001246247411472723, 'timestamp': '2025-09-10 02:32:44.022686', 'step': 6665, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:44.052044', 'step': 6665, 'epoch': 3} {'type': 'loss', 'content': 0.008284551091492176, 'timestamp': '2025-09-10 02:32:44.053949', 'step': 6666, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:44.082876', 'step': 6666, 'epoch': 3} {'type': 'loss', 'content': 0.0006869097123853862, 'timestamp': '2025-09-10 02:32:44.084776', 'step': 6667, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:44.113528', 'step': 6667, 'epoch': 3} {'type': 'loss', 'content': 0.021662766113877296, 'timestamp': '2025-09-10 02:32:44.137101', 'step': 6668, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:44.166537', 'step': 6668, 'epoch': 3} {'type': 'loss', 'content': 0.0006537624867632985, 'timestamp': '2025-09-10 02:32:44.168617', 'step': 6669, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:44.197870', 'step': 6669, 'epoch': 3} {'type': 'loss', 'content': 0.00017999236297328025, 'timestamp': '2025-09-10 02:32:44.200010', 'step': 6670, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:44.228986', 'step': 6670, 'epoch': 3} {'type': 'loss', 'content': 8.588766650063917e-05, 'timestamp': '2025-09-10 02:32:44.231128', 'step': 6671, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:44.261322', 'step': 6671, 'epoch': 3} {'type': 'loss', 'content': 0.0011683589546009898, 'timestamp': '2025-09-10 02:32:44.284904', 'step': 6672, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:44.314054', 'step': 6672, 'epoch': 3} {'type': 'loss', 'content': 0.0007778553408570588, 'timestamp': '2025-09-10 02:32:44.316823', 'step': 6673, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:44.347732', 'step': 6673, 'epoch': 3} {'type': 'loss', 'content': 0.0004378134326543659, 'timestamp': '2025-09-10 02:32:44.349887', 'step': 6674, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:44.380148', 'step': 6674, 'epoch': 3} {'type': 'loss', 'content': 0.010837412439286709, 'timestamp': '2025-09-10 02:32:44.382113', 'step': 6675, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:44.411187', 'step': 6675, 'epoch': 3} {'type': 'loss', 'content': 0.0008242077310569584, 'timestamp': '2025-09-10 02:32:44.434681', 'step': 6676, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:44.472534', 'step': 6676, 'epoch': 3} {'type': 'loss', 'content': 0.004866982344537973, 'timestamp': '2025-09-10 02:32:44.474381', 'step': 6677, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:44.504050', 'step': 6677, 'epoch': 3} {'type': 'loss', 'content': 0.00041917446651495993, 'timestamp': '2025-09-10 02:32:44.506377', 'step': 6678, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:44.535753', 'step': 6678, 'epoch': 3} {'type': 'loss', 'content': 0.00038965611020103097, 'timestamp': '2025-09-10 02:32:44.537907', 'step': 6679, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:44.566724', 'step': 6679, 'epoch': 3} {'type': 'loss', 'content': 0.017665987834334373, 'timestamp': '2025-09-10 02:32:44.590396', 'step': 6680, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:44.619671', 'step': 6680, 'epoch': 3} {'type': 'loss', 'content': 0.0001284900208702311, 'timestamp': '2025-09-10 02:32:44.621786', 'step': 6681, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:44.650665', 'step': 6681, 'epoch': 3} {'type': 'loss', 'content': 0.00014236057177186012, 'timestamp': '2025-09-10 02:32:44.652852', 'step': 6682, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:44.682151', 'step': 6682, 'epoch': 3} {'type': 'loss', 'content': 0.00045861300895921886, 'timestamp': '2025-09-10 02:32:44.684166', 'step': 6683, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:44.713031', 'step': 6683, 'epoch': 3} {'type': 'loss', 'content': 0.00014278704475145787, 'timestamp': '2025-09-10 02:32:44.736796', 'step': 6684, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:44.767318', 'step': 6684, 'epoch': 3} {'type': 'loss', 'content': 0.00020633505482692271, 'timestamp': '2025-09-10 02:32:44.769420', 'step': 6685, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:44.801789', 'step': 6685, 'epoch': 3} {'type': 'loss', 'content': 0.00016045241500250995, 'timestamp': '2025-09-10 02:32:44.803672', 'step': 6686, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:44.832409', 'step': 6686, 'epoch': 3} {'type': 'loss', 'content': 0.0002670391113497317, 'timestamp': '2025-09-10 02:32:44.834991', 'step': 6687, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:44.863688', 'step': 6687, 'epoch': 3} {'type': 'loss', 'content': 0.000776700850110501, 'timestamp': '2025-09-10 02:32:44.887236', 'step': 6688, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1582003754624}], 'timestamp': '2025-09-10 02:32:46.773454', 'step': 6688, 'epoch': 3} {'type': 'pplx', 'content': 2634816.097719576, 'timestamp': '2025-09-10 02:32:46.776015', 'step': 6688, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:46.808653', 'step': 6688, 'epoch': 3} {'type': 'loss', 'content': 0.0011966631282120943, 'timestamp': '2025-09-10 02:32:46.810605', 'step': 6689, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:46.839851', 'step': 6689, 'epoch': 3} {'type': 'loss', 'content': 0.001301180454902351, 'timestamp': '2025-09-10 02:32:46.841931', 'step': 6690, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:46.870836', 'step': 6690, 'epoch': 3} {'type': 'loss', 'content': 0.00025518672191537917, 'timestamp': '2025-09-10 02:32:46.872574', 'step': 6691, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:46.901876', 'step': 6691, 'epoch': 3} {'type': 'loss', 'content': 0.00013390577805694193, 'timestamp': '2025-09-10 02:32:46.926006', 'step': 6692, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:46.955461', 'step': 6692, 'epoch': 3} {'type': 'loss', 'content': 0.002019841456785798, 'timestamp': '2025-09-10 02:32:46.957657', 'step': 6693, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:46.986484', 'step': 6693, 'epoch': 3} {'type': 'loss', 'content': 0.000430583517299965, 'timestamp': '2025-09-10 02:32:46.988472', 'step': 6694, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:47.018352', 'step': 6694, 'epoch': 3} {'type': 'loss', 'content': 0.00022048897517379373, 'timestamp': '2025-09-10 02:32:47.020143', 'step': 6695, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:47.049252', 'step': 6695, 'epoch': 3} {'type': 'loss', 'content': 0.009212544187903404, 'timestamp': '2025-09-10 02:32:47.072661', 'step': 6696, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:47.101348', 'step': 6696, 'epoch': 3} {'type': 'loss', 'content': 0.0011059354292228818, 'timestamp': '2025-09-10 02:32:47.103412', 'step': 6697, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:47.132522', 'step': 6697, 'epoch': 3} {'type': 'loss', 'content': 0.0013964826939627528, 'timestamp': '2025-09-10 02:32:47.134236', 'step': 6698, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:47.163221', 'step': 6698, 'epoch': 3} {'type': 'loss', 'content': 0.002260487526655197, 'timestamp': '2025-09-10 02:32:47.165263', 'step': 6699, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:47.194402', 'step': 6699, 'epoch': 3} {'type': 'loss', 'content': 0.0012373748468235135, 'timestamp': '2025-09-10 02:32:47.217740', 'step': 6700, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:47.251024', 'step': 6700, 'epoch': 3} {'type': 'loss', 'content': 0.00034490125835873187, 'timestamp': '2025-09-10 02:32:47.253141', 'step': 6701, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:47.283566', 'step': 6701, 'epoch': 3} {'type': 'loss', 'content': 0.00042592009413056076, 'timestamp': '2025-09-10 02:32:47.285340', 'step': 6702, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:47.315568', 'step': 6702, 'epoch': 3} {'type': 'loss', 'content': 7.375200948445126e-05, 'timestamp': '2025-09-10 02:32:47.317531', 'step': 6703, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:47.351570', 'step': 6703, 'epoch': 3} {'type': 'loss', 'content': 0.0007520223734900355, 'timestamp': '2025-09-10 02:32:47.374923', 'step': 6704, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:47.409795', 'step': 6704, 'epoch': 3} {'type': 'loss', 'content': 0.006365684326738119, 'timestamp': '2025-09-10 02:32:47.411539', 'step': 6705, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:47.443914', 'step': 6705, 'epoch': 3} {'type': 'loss', 'content': 0.014979584142565727, 'timestamp': '2025-09-10 02:32:47.446028', 'step': 6706, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:47.478362', 'step': 6706, 'epoch': 3} {'type': 'loss', 'content': 0.0001600277901161462, 'timestamp': '2025-09-10 02:32:47.480782', 'step': 6707, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:47.513289', 'step': 6707, 'epoch': 3} {'type': 'loss', 'content': 0.0006272908649407327, 'timestamp': '2025-09-10 02:32:47.536742', 'step': 6708, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:47.569708', 'step': 6708, 'epoch': 3} {'type': 'loss', 'content': 0.002627496374770999, 'timestamp': '2025-09-10 02:32:47.571837', 'step': 6709, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:47.605615', 'step': 6709, 'epoch': 3} {'type': 'loss', 'content': 6.385224696714431e-05, 'timestamp': '2025-09-10 02:32:47.607902', 'step': 6710, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:47.642766', 'step': 6710, 'epoch': 3} {'type': 'loss', 'content': 0.0007850642432458699, 'timestamp': '2025-09-10 02:32:47.644590', 'step': 6711, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:47.681506', 'step': 6711, 'epoch': 3} {'type': 'loss', 'content': 9.543353371554986e-05, 'timestamp': '2025-09-10 02:32:47.704790', 'step': 6712, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:47.742055', 'step': 6712, 'epoch': 3} {'type': 'loss', 'content': 0.00014451451716013253, 'timestamp': '2025-09-10 02:32:47.745163', 'step': 6713, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:47.779304', 'step': 6713, 'epoch': 3} {'type': 'loss', 'content': 0.007798864506185055, 'timestamp': '2025-09-10 02:32:47.781069', 'step': 6714, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:47.817635', 'step': 6714, 'epoch': 3} {'type': 'loss', 'content': 0.0004212721833027899, 'timestamp': '2025-09-10 02:32:47.819793', 'step': 6715, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:47.848957', 'step': 6715, 'epoch': 3} {'type': 'loss', 'content': 0.0014056451618671417, 'timestamp': '2025-09-10 02:32:47.872700', 'step': 6716, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:47.901858', 'step': 6716, 'epoch': 3} {'type': 'loss', 'content': 0.00029630190692842007, 'timestamp': '2025-09-10 02:32:47.904792', 'step': 6717, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:47.935506', 'step': 6717, 'epoch': 3} {'type': 'loss', 'content': 0.00025604027905501425, 'timestamp': '2025-09-10 02:32:47.937638', 'step': 6718, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:47.966390', 'step': 6718, 'epoch': 3} {'type': 'loss', 'content': 6.539438618347049e-05, 'timestamp': '2025-09-10 02:32:47.968206', 'step': 6719, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:47.997045', 'step': 6719, 'epoch': 3} {'type': 'loss', 'content': 0.00022632161562796682, 'timestamp': '2025-09-10 02:32:48.020360', 'step': 6720, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:48.049654', 'step': 6720, 'epoch': 3} {'type': 'loss', 'content': 0.0006399400299414992, 'timestamp': '2025-09-10 02:32:48.051540', 'step': 6721, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:48.080073', 'step': 6721, 'epoch': 3} {'type': 'loss', 'content': 0.00018006419122684747, 'timestamp': '2025-09-10 02:32:48.081967', 'step': 6722, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:48.110591', 'step': 6722, 'epoch': 3} {'type': 'loss', 'content': 0.00041571405017748475, 'timestamp': '2025-09-10 02:32:48.112251', 'step': 6723, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:48.141058', 'step': 6723, 'epoch': 3} {'type': 'loss', 'content': 0.02812386117875576, 'timestamp': '2025-09-10 02:32:48.164605', 'step': 6724, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:48.194003', 'step': 6724, 'epoch': 3} {'type': 'loss', 'content': 0.0004604542045854032, 'timestamp': '2025-09-10 02:32:48.195892', 'step': 6725, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:48.224642', 'step': 6725, 'epoch': 3} {'type': 'loss', 'content': 0.0003610174753703177, 'timestamp': '2025-09-10 02:32:48.226406', 'step': 6726, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:48.255245', 'step': 6726, 'epoch': 3} {'type': 'loss', 'content': 0.0011343618389219046, 'timestamp': '2025-09-10 02:32:48.256988', 'step': 6727, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:48.285554', 'step': 6727, 'epoch': 3} {'type': 'loss', 'content': 7.726944022579119e-05, 'timestamp': '2025-09-10 02:32:48.308801', 'step': 6728, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:48.337271', 'step': 6728, 'epoch': 3} {'type': 'loss', 'content': 0.0002777362533379346, 'timestamp': '2025-09-10 02:32:48.339053', 'step': 6729, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:48.368360', 'step': 6729, 'epoch': 3} {'type': 'loss', 'content': 8.940829138737172e-05, 'timestamp': '2025-09-10 02:32:48.370496', 'step': 6730, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:48.400395', 'step': 6730, 'epoch': 3} {'type': 'loss', 'content': 0.0007924687815830112, 'timestamp': '2025-09-10 02:32:48.402189', 'step': 6731, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:48.430994', 'step': 6731, 'epoch': 3} {'type': 'loss', 'content': 0.0001112060053856112, 'timestamp': '2025-09-10 02:32:48.455663', 'step': 6732, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:48.484326', 'step': 6732, 'epoch': 3} {'type': 'loss', 'content': 0.007602198980748653, 'timestamp': '2025-09-10 02:32:48.485950', 'step': 6733, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:48.514409', 'step': 6733, 'epoch': 3} {'type': 'loss', 'content': 0.00442105857655406, 'timestamp': '2025-09-10 02:32:48.516385', 'step': 6734, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:48.545639', 'step': 6734, 'epoch': 3} {'type': 'loss', 'content': 0.00020948487508576363, 'timestamp': '2025-09-10 02:32:48.547564', 'step': 6735, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:48.576704', 'step': 6735, 'epoch': 3} {'type': 'loss', 'content': 0.00018643557268660516, 'timestamp': '2025-09-10 02:32:48.599729', 'step': 6736, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:48.629362', 'step': 6736, 'epoch': 3} {'type': 'loss', 'content': 0.0019744031596928835, 'timestamp': '2025-09-10 02:32:48.631682', 'step': 6737, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:48.660652', 'step': 6737, 'epoch': 3} {'type': 'loss', 'content': 0.0001240462443092838, 'timestamp': '2025-09-10 02:32:48.662601', 'step': 6738, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:48.692886', 'step': 6738, 'epoch': 3} {'type': 'loss', 'content': 5.261231126496568e-05, 'timestamp': '2025-09-10 02:32:48.694527', 'step': 6739, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:48.723214', 'step': 6739, 'epoch': 3} {'type': 'loss', 'content': 0.00011474742495920509, 'timestamp': '2025-09-10 02:32:48.746600', 'step': 6740, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:48.777605', 'step': 6740, 'epoch': 3} {'type': 'loss', 'content': 5.554107701755129e-05, 'timestamp': '2025-09-10 02:32:48.779672', 'step': 6741, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:48.814010', 'step': 6741, 'epoch': 3} {'type': 'loss', 'content': 0.00044761571916751564, 'timestamp': '2025-09-10 02:32:48.815753', 'step': 6742, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:48.844806', 'step': 6742, 'epoch': 3} {'type': 'loss', 'content': 9.237445192411542e-05, 'timestamp': '2025-09-10 02:32:48.846498', 'step': 6743, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:48.875127', 'step': 6743, 'epoch': 3} {'type': 'loss', 'content': 7.44791905162856e-05, 'timestamp': '2025-09-10 02:32:48.898776', 'step': 6744, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:48.928586', 'step': 6744, 'epoch': 3} {'type': 'loss', 'content': 0.0011504106223583221, 'timestamp': '2025-09-10 02:32:48.930547', 'step': 6745, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:48.959523', 'step': 6745, 'epoch': 3} {'type': 'loss', 'content': 0.0004857448220718652, 'timestamp': '2025-09-10 02:32:48.961239', 'step': 6746, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:48.989967', 'step': 6746, 'epoch': 3} {'type': 'loss', 'content': 0.00015927580534480512, 'timestamp': '2025-09-10 02:32:48.991709', 'step': 6747, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:49.021478', 'step': 6747, 'epoch': 3} {'type': 'loss', 'content': 0.0001082676462829113, 'timestamp': '2025-09-10 02:32:49.044731', 'step': 6748, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:49.073802', 'step': 6748, 'epoch': 3} {'type': 'loss', 'content': 0.00010946422844426706, 'timestamp': '2025-09-10 02:32:49.075497', 'step': 6749, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:49.103969', 'step': 6749, 'epoch': 3} {'type': 'loss', 'content': 0.0020970015320926905, 'timestamp': '2025-09-10 02:32:49.105757', 'step': 6750, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:49.134879', 'step': 6750, 'epoch': 3} {'type': 'loss', 'content': 0.0008264260250143707, 'timestamp': '2025-09-10 02:32:49.136488', 'step': 6751, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:49.165267', 'step': 6751, 'epoch': 3} {'type': 'loss', 'content': 0.00023407131084240973, 'timestamp': '2025-09-10 02:32:49.188596', 'step': 6752, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:49.218145', 'step': 6752, 'epoch': 3} {'type': 'loss', 'content': 0.0007170811877585948, 'timestamp': '2025-09-10 02:32:49.219827', 'step': 6753, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:49.253911', 'step': 6753, 'epoch': 3} {'type': 'loss', 'content': 0.00010154004121432081, 'timestamp': '2025-09-10 02:32:49.255980', 'step': 6754, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:49.285659', 'step': 6754, 'epoch': 3} {'type': 'loss', 'content': 0.00010521234798943624, 'timestamp': '2025-09-10 02:32:49.287228', 'step': 6755, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:49.317031', 'step': 6755, 'epoch': 3} {'type': 'loss', 'content': 6.685069820377976e-05, 'timestamp': '2025-09-10 02:32:49.340383', 'step': 6756, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:49.370267', 'step': 6756, 'epoch': 3} {'type': 'loss', 'content': 0.0010209261672571301, 'timestamp': '2025-09-10 02:32:49.373652', 'step': 6757, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:49.409611', 'step': 6757, 'epoch': 3} {'type': 'loss', 'content': 0.0022340952418744564, 'timestamp': '2025-09-10 02:32:49.411537', 'step': 6758, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:49.444188', 'step': 6758, 'epoch': 3} {'type': 'loss', 'content': 0.00013346783816814423, 'timestamp': '2025-09-10 02:32:49.446253', 'step': 6759, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:49.478474', 'step': 6759, 'epoch': 3} {'type': 'loss', 'content': 0.0002406542480457574, 'timestamp': '2025-09-10 02:32:49.502618', 'step': 6760, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:49.534866', 'step': 6760, 'epoch': 3} {'type': 'loss', 'content': 6.465790647780523e-05, 'timestamp': '2025-09-10 02:32:49.536769', 'step': 6761, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:49.568879', 'step': 6761, 'epoch': 3} {'type': 'loss', 'content': 0.0006072352989576757, 'timestamp': '2025-09-10 02:32:49.570545', 'step': 6762, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:49.605203', 'step': 6762, 'epoch': 3} {'type': 'loss', 'content': 5.9133617469342425e-05, 'timestamp': '2025-09-10 02:32:49.607480', 'step': 6763, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:49.641949', 'step': 6763, 'epoch': 3} {'type': 'loss', 'content': 0.0001183756030513905, 'timestamp': '2025-09-10 02:32:49.665475', 'step': 6764, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:49.703370', 'step': 6764, 'epoch': 3} {'type': 'loss', 'content': 3.824655505013652e-05, 'timestamp': '2025-09-10 02:32:49.705081', 'step': 6765, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:49.744919', 'step': 6765, 'epoch': 3} {'type': 'loss', 'content': 0.0017311256378889084, 'timestamp': '2025-09-10 02:32:49.746903', 'step': 6766, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:49.782135', 'step': 6766, 'epoch': 3} {'type': 'loss', 'content': 7.348317740252241e-05, 'timestamp': '2025-09-10 02:32:49.783845', 'step': 6767, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:49.820374', 'step': 6767, 'epoch': 3} {'type': 'loss', 'content': 0.009175836108624935, 'timestamp': '2025-09-10 02:32:49.844029', 'step': 6768, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:49.873488', 'step': 6768, 'epoch': 3} {'type': 'loss', 'content': 0.0003108904347755015, 'timestamp': '2025-09-10 02:32:49.875733', 'step': 6769, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:49.908957', 'step': 6769, 'epoch': 3} {'type': 'loss', 'content': 0.0001275867980439216, 'timestamp': '2025-09-10 02:32:49.910739', 'step': 6770, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:49.939557', 'step': 6770, 'epoch': 3} {'type': 'loss', 'content': 0.0001226141321239993, 'timestamp': '2025-09-10 02:32:49.941820', 'step': 6771, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:49.970800', 'step': 6771, 'epoch': 3} {'type': 'loss', 'content': 0.00014986835594754666, 'timestamp': '2025-09-10 02:32:49.994859', 'step': 6772, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:50.023459', 'step': 6772, 'epoch': 3} {'type': 'loss', 'content': 0.0015205388190224767, 'timestamp': '2025-09-10 02:32:50.025295', 'step': 6773, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:50.054534', 'step': 6773, 'epoch': 3} {'type': 'loss', 'content': 0.00012730220623780042, 'timestamp': '2025-09-10 02:32:50.056541', 'step': 6774, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:50.085130', 'step': 6774, 'epoch': 3} {'type': 'loss', 'content': 5.468766175908968e-05, 'timestamp': '2025-09-10 02:32:50.087955', 'step': 6775, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:50.117227', 'step': 6775, 'epoch': 3} {'type': 'loss', 'content': 0.00021866762835998088, 'timestamp': '2025-09-10 02:32:50.141465', 'step': 6776, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:50.171225', 'step': 6776, 'epoch': 3} {'type': 'loss', 'content': 7.189018651843071e-05, 'timestamp': '2025-09-10 02:32:50.173282', 'step': 6777, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:50.202948', 'step': 6777, 'epoch': 3} {'type': 'loss', 'content': 0.00012346556468401104, 'timestamp': '2025-09-10 02:32:50.205449', 'step': 6778, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:50.235533', 'step': 6778, 'epoch': 3} {'type': 'loss', 'content': 0.005750373005867004, 'timestamp': '2025-09-10 02:32:50.237940', 'step': 6779, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:50.267609', 'step': 6779, 'epoch': 3} {'type': 'loss', 'content': 0.0001830299006542191, 'timestamp': '2025-09-10 02:32:50.291515', 'step': 6780, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:50.321349', 'step': 6780, 'epoch': 3} {'type': 'loss', 'content': 0.00012977873848285526, 'timestamp': '2025-09-10 02:32:50.323390', 'step': 6781, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:50.352546', 'step': 6781, 'epoch': 3} {'type': 'loss', 'content': 0.0009929948719218373, 'timestamp': '2025-09-10 02:32:50.355368', 'step': 6782, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:50.384729', 'step': 6782, 'epoch': 3} {'type': 'loss', 'content': 0.057896532118320465, 'timestamp': '2025-09-10 02:32:50.386951', 'step': 6783, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:50.416111', 'step': 6783, 'epoch': 3} {'type': 'loss', 'content': 8.889515447663143e-05, 'timestamp': '2025-09-10 02:32:50.439572', 'step': 6784, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:50.468594', 'step': 6784, 'epoch': 3} {'type': 'loss', 'content': 0.0056881383061409, 'timestamp': '2025-09-10 02:32:50.470667', 'step': 6785, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:50.499527', 'step': 6785, 'epoch': 3} {'type': 'loss', 'content': 0.0003143739595543593, 'timestamp': '2025-09-10 02:32:50.501432', 'step': 6786, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:50.531118', 'step': 6786, 'epoch': 3} {'type': 'loss', 'content': 0.0001031060965033248, 'timestamp': '2025-09-10 02:32:50.533137', 'step': 6787, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:50.562618', 'step': 6787, 'epoch': 3} {'type': 'loss', 'content': 0.007194445002824068, 'timestamp': '2025-09-10 02:32:50.586210', 'step': 6788, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:50.615536', 'step': 6788, 'epoch': 3} {'type': 'loss', 'content': 9.384167788084596e-05, 'timestamp': '2025-09-10 02:32:50.617392', 'step': 6789, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:50.646680', 'step': 6789, 'epoch': 3} {'type': 'loss', 'content': 5.198954386287369e-05, 'timestamp': '2025-09-10 02:32:50.648891', 'step': 6790, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:50.678812', 'step': 6790, 'epoch': 3} {'type': 'loss', 'content': 0.00022589776199311018, 'timestamp': '2025-09-10 02:32:50.680809', 'step': 6791, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:50.709765', 'step': 6791, 'epoch': 3} {'type': 'loss', 'content': 9.448708442505449e-05, 'timestamp': '2025-09-10 02:32:50.733374', 'step': 6792, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:50.762839', 'step': 6792, 'epoch': 3} {'type': 'loss', 'content': 0.00011150127829751, 'timestamp': '2025-09-10 02:32:50.764783', 'step': 6793, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:50.796887', 'step': 6793, 'epoch': 3} {'type': 'loss', 'content': 0.0001500972721260041, 'timestamp': '2025-09-10 02:32:50.798827', 'step': 6794, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:50.828818', 'step': 6794, 'epoch': 3} {'type': 'loss', 'content': 5.231571412878111e-05, 'timestamp': '2025-09-10 02:32:50.831029', 'step': 6795, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:50.860183', 'step': 6795, 'epoch': 3} {'type': 'loss', 'content': 0.0001222932041855529, 'timestamp': '2025-09-10 02:32:50.884457', 'step': 6796, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:50.916887', 'step': 6796, 'epoch': 3} {'type': 'loss', 'content': 7.24709389032796e-05, 'timestamp': '2025-09-10 02:32:50.919035', 'step': 6797, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:50.949587', 'step': 6797, 'epoch': 3} {'type': 'loss', 'content': 0.00015622669889125973, 'timestamp': '2025-09-10 02:32:50.951594', 'step': 6798, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:50.980638', 'step': 6798, 'epoch': 3} {'type': 'loss', 'content': 0.00018378280219621956, 'timestamp': '2025-09-10 02:32:50.982808', 'step': 6799, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:51.012392', 'step': 6799, 'epoch': 3} {'type': 'loss', 'content': 0.0005293177091516554, 'timestamp': '2025-09-10 02:32:51.035914', 'step': 6800, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:51.065087', 'step': 6800, 'epoch': 3} {'type': 'loss', 'content': 0.004148623440414667, 'timestamp': '2025-09-10 02:32:51.067138', 'step': 6801, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:51.095993', 'step': 6801, 'epoch': 3} {'type': 'loss', 'content': 0.02354315109550953, 'timestamp': '2025-09-10 02:32:51.098079', 'step': 6802, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:51.126948', 'step': 6802, 'epoch': 3} {'type': 'loss', 'content': 0.00010290476348018274, 'timestamp': '2025-09-10 02:32:51.129211', 'step': 6803, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:51.158673', 'step': 6803, 'epoch': 3} {'type': 'loss', 'content': 3.785776789300144e-05, 'timestamp': '2025-09-10 02:32:51.182185', 'step': 6804, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:51.211353', 'step': 6804, 'epoch': 3} {'type': 'loss', 'content': 3.411351644899696e-05, 'timestamp': '2025-09-10 02:32:51.213562', 'step': 6805, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:51.247457', 'step': 6805, 'epoch': 3} {'type': 'loss', 'content': 0.000211441089049913, 'timestamp': '2025-09-10 02:32:51.249652', 'step': 6806, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:51.281336', 'step': 6806, 'epoch': 3} {'type': 'loss', 'content': 0.0008072297205217183, 'timestamp': '2025-09-10 02:32:51.283381', 'step': 6807, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:51.314149', 'step': 6807, 'epoch': 3} {'type': 'loss', 'content': 0.012630701996386051, 'timestamp': '2025-09-10 02:32:51.337681', 'step': 6808, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:51.367604', 'step': 6808, 'epoch': 3} {'type': 'loss', 'content': 0.0003062119649257511, 'timestamp': '2025-09-10 02:32:51.369837', 'step': 6809, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:51.402890', 'step': 6809, 'epoch': 3} {'type': 'loss', 'content': 4.283956513972953e-05, 'timestamp': '2025-09-10 02:32:51.405172', 'step': 6810, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:51.439422', 'step': 6810, 'epoch': 3} {'type': 'loss', 'content': 0.00022908160462975502, 'timestamp': '2025-09-10 02:32:51.441507', 'step': 6811, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:51.474207', 'step': 6811, 'epoch': 3} {'type': 'loss', 'content': 0.0009302693651989102, 'timestamp': '2025-09-10 02:32:51.497803', 'step': 6812, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:51.531350', 'step': 6812, 'epoch': 3} {'type': 'loss', 'content': 0.0002662238839548081, 'timestamp': '2025-09-10 02:32:51.533449', 'step': 6813, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:51.567134', 'step': 6813, 'epoch': 3} {'type': 'loss', 'content': 0.0683477595448494, 'timestamp': '2025-09-10 02:32:51.569031', 'step': 6814, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:51.603651', 'step': 6814, 'epoch': 3} {'type': 'loss', 'content': 0.0001606199366506189, 'timestamp': '2025-09-10 02:32:51.605947', 'step': 6815, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:51.640584', 'step': 6815, 'epoch': 3} {'type': 'loss', 'content': 0.0006426494219340384, 'timestamp': '2025-09-10 02:32:51.664323', 'step': 6816, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:51.702821', 'step': 6816, 'epoch': 3} {'type': 'loss', 'content': 7.264408486662433e-05, 'timestamp': '2025-09-10 02:32:51.705071', 'step': 6817, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:51.743347', 'step': 6817, 'epoch': 3} {'type': 'loss', 'content': 6.33458184893243e-05, 'timestamp': '2025-09-10 02:32:51.745456', 'step': 6818, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:51.780619', 'step': 6818, 'epoch': 3} {'type': 'loss', 'content': 8.742232603253797e-05, 'timestamp': '2025-09-10 02:32:51.782711', 'step': 6819, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:51.819675', 'step': 6819, 'epoch': 3} {'type': 'loss', 'content': 9.838306868914515e-05, 'timestamp': '2025-09-10 02:32:51.843467', 'step': 6820, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:51.872891', 'step': 6820, 'epoch': 3} {'type': 'loss', 'content': 0.0002053830394288525, 'timestamp': '2025-09-10 02:32:51.875024', 'step': 6821, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:51.904244', 'step': 6821, 'epoch': 3} {'type': 'loss', 'content': 4.579432788887061e-05, 'timestamp': '2025-09-10 02:32:51.906357', 'step': 6822, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:51.935633', 'step': 6822, 'epoch': 3} {'type': 'loss', 'content': 0.00019780623551923782, 'timestamp': '2025-09-10 02:32:51.937825', 'step': 6823, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:51.968308', 'step': 6823, 'epoch': 3} {'type': 'loss', 'content': 0.00027067208429798484, 'timestamp': '2025-09-10 02:32:51.992018', 'step': 6824, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:52.026939', 'step': 6824, 'epoch': 3} {'type': 'loss', 'content': 5.8917088608723134e-05, 'timestamp': '2025-09-10 02:32:52.029124', 'step': 6825, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:52.058974', 'step': 6825, 'epoch': 3} {'type': 'loss', 'content': 0.00013341773592401296, 'timestamp': '2025-09-10 02:32:52.061170', 'step': 6826, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:52.090966', 'step': 6826, 'epoch': 3} {'type': 'loss', 'content': 0.0039385221898555756, 'timestamp': '2025-09-10 02:32:52.093065', 'step': 6827, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:52.122588', 'step': 6827, 'epoch': 3} {'type': 'loss', 'content': 6.967805529711768e-05, 'timestamp': '2025-09-10 02:32:52.146106', 'step': 6828, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:52.176423', 'step': 6828, 'epoch': 3} {'type': 'loss', 'content': 8.656596764922142e-05, 'timestamp': '2025-09-10 02:32:52.178249', 'step': 6829, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:52.207182', 'step': 6829, 'epoch': 3} {'type': 'loss', 'content': 0.0003680096997413784, 'timestamp': '2025-09-10 02:32:52.209075', 'step': 6830, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:52.239013', 'step': 6830, 'epoch': 3} {'type': 'loss', 'content': 0.00017694065172690898, 'timestamp': '2025-09-10 02:32:52.241170', 'step': 6831, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:52.270142', 'step': 6831, 'epoch': 3} {'type': 'loss', 'content': 0.00028491156990639865, 'timestamp': '2025-09-10 02:32:52.293973', 'step': 6832, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:52.324242', 'step': 6832, 'epoch': 3} {'type': 'loss', 'content': 0.00012682615488301963, 'timestamp': '2025-09-10 02:32:52.326346', 'step': 6833, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:52.355975', 'step': 6833, 'epoch': 3} {'type': 'loss', 'content': 0.00012047600466758013, 'timestamp': '2025-09-10 02:32:52.357957', 'step': 6834, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:52.386930', 'step': 6834, 'epoch': 3} {'type': 'loss', 'content': 0.00045988403144292533, 'timestamp': '2025-09-10 02:32:52.389019', 'step': 6835, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:52.418179', 'step': 6835, 'epoch': 3} {'type': 'loss', 'content': 0.0002953163639176637, 'timestamp': '2025-09-10 02:32:52.441855', 'step': 6836, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:52.471362', 'step': 6836, 'epoch': 3} {'type': 'loss', 'content': 8.624989277450368e-05, 'timestamp': '2025-09-10 02:32:52.473326', 'step': 6837, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:52.502640', 'step': 6837, 'epoch': 3} {'type': 'loss', 'content': 0.0001436011807527393, 'timestamp': '2025-09-10 02:32:52.504843', 'step': 6838, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:52.533797', 'step': 6838, 'epoch': 3} {'type': 'loss', 'content': 0.004798574838787317, 'timestamp': '2025-09-10 02:32:52.535918', 'step': 6839, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:52.565034', 'step': 6839, 'epoch': 3} {'type': 'loss', 'content': 0.0016028692480176687, 'timestamp': '2025-09-10 02:32:52.588874', 'step': 6840, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1582003754624}], 'timestamp': '2025-09-10 02:32:54.471541', 'step': 6840, 'epoch': 3} {'type': 'pplx', 'content': 2506378.380704593, 'timestamp': '2025-09-10 02:32:54.473353', 'step': 6840, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:54.501762', 'step': 6840, 'epoch': 3} {'type': 'loss', 'content': 0.0004131238965783268, 'timestamp': '2025-09-10 02:32:54.503360', 'step': 6841, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:54.533279', 'step': 6841, 'epoch': 3} {'type': 'loss', 'content': 0.002611250150948763, 'timestamp': '2025-09-10 02:32:54.535221', 'step': 6842, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:54.564553', 'step': 6842, 'epoch': 3} {'type': 'loss', 'content': 0.025640210136771202, 'timestamp': '2025-09-10 02:32:54.566365', 'step': 6843, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:54.596063', 'step': 6843, 'epoch': 3} {'type': 'loss', 'content': 0.00010162144462810829, 'timestamp': '2025-09-10 02:32:54.619813', 'step': 6844, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:54.650061', 'step': 6844, 'epoch': 3} {'type': 'loss', 'content': 0.00016020890325307846, 'timestamp': '2025-09-10 02:32:54.651682', 'step': 6845, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:54.680443', 'step': 6845, 'epoch': 3} {'type': 'loss', 'content': 0.0006879153079353273, 'timestamp': '2025-09-10 02:32:54.682265', 'step': 6846, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:54.711305', 'step': 6846, 'epoch': 3} {'type': 'loss', 'content': 0.0020874382462352514, 'timestamp': '2025-09-10 02:32:54.713475', 'step': 6847, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:54.742452', 'step': 6847, 'epoch': 3} {'type': 'loss', 'content': 0.00032064953120425344, 'timestamp': '2025-09-10 02:32:54.765954', 'step': 6848, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:54.798185', 'step': 6848, 'epoch': 3} {'type': 'loss', 'content': 7.953395834192634e-05, 'timestamp': '2025-09-10 02:32:54.800120', 'step': 6849, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:54.829865', 'step': 6849, 'epoch': 3} {'type': 'loss', 'content': 0.001108153141103685, 'timestamp': '2025-09-10 02:32:54.831564', 'step': 6850, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:54.860731', 'step': 6850, 'epoch': 3} {'type': 'loss', 'content': 0.00035554394708015025, 'timestamp': '2025-09-10 02:32:54.862631', 'step': 6851, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:54.891694', 'step': 6851, 'epoch': 3} {'type': 'loss', 'content': 9.832141950028017e-05, 'timestamp': '2025-09-10 02:32:54.915052', 'step': 6852, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:54.944091', 'step': 6852, 'epoch': 3} {'type': 'loss', 'content': 7.806030043866485e-05, 'timestamp': '2025-09-10 02:32:54.945816', 'step': 6853, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:54.974879', 'step': 6853, 'epoch': 3} {'type': 'loss', 'content': 0.035263847559690475, 'timestamp': '2025-09-10 02:32:54.976914', 'step': 6854, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:55.006367', 'step': 6854, 'epoch': 3} {'type': 'loss', 'content': 0.006878325249999762, 'timestamp': '2025-09-10 02:32:55.008257', 'step': 6855, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:55.037134', 'step': 6855, 'epoch': 3} {'type': 'loss', 'content': 0.0003121345944236964, 'timestamp': '2025-09-10 02:32:55.060664', 'step': 6856, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:55.090180', 'step': 6856, 'epoch': 3} {'type': 'loss', 'content': 0.0030188935343176126, 'timestamp': '2025-09-10 02:32:55.092033', 'step': 6857, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:55.121012', 'step': 6857, 'epoch': 3} {'type': 'loss', 'content': 0.011274388059973717, 'timestamp': '2025-09-10 02:32:55.122826', 'step': 6858, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:55.151502', 'step': 6858, 'epoch': 3} {'type': 'loss', 'content': 0.00012130722461733967, 'timestamp': '2025-09-10 02:32:55.153221', 'step': 6859, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:55.183550', 'step': 6859, 'epoch': 3} {'type': 'loss', 'content': 9.649340790929273e-05, 'timestamp': '2025-09-10 02:32:55.207061', 'step': 6860, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:55.235719', 'step': 6860, 'epoch': 3} {'type': 'loss', 'content': 0.00036928398185409606, 'timestamp': '2025-09-10 02:32:55.237470', 'step': 6861, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:55.270406', 'step': 6861, 'epoch': 3} {'type': 'loss', 'content': 0.0006278843502514064, 'timestamp': '2025-09-10 02:32:55.272139', 'step': 6862, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:55.304055', 'step': 6862, 'epoch': 3} {'type': 'loss', 'content': 0.0012702380772680044, 'timestamp': '2025-09-10 02:32:55.314841', 'step': 6863, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:55.357629', 'step': 6863, 'epoch': 3} {'type': 'loss', 'content': 0.00018065668700728565, 'timestamp': '2025-09-10 02:32:55.380883', 'step': 6864, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:55.413807', 'step': 6864, 'epoch': 3} {'type': 'loss', 'content': 7.278566772583872e-05, 'timestamp': '2025-09-10 02:32:55.415423', 'step': 6865, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:55.447210', 'step': 6865, 'epoch': 3} {'type': 'loss', 'content': 0.00028578523779287934, 'timestamp': '2025-09-10 02:32:55.448828', 'step': 6866, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:55.481568', 'step': 6866, 'epoch': 3} {'type': 'loss', 'content': 3.5654356906889006e-05, 'timestamp': '2025-09-10 02:32:55.483478', 'step': 6867, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:55.515472', 'step': 6867, 'epoch': 3} {'type': 'loss', 'content': 0.00021527970966417342, 'timestamp': '2025-09-10 02:32:55.540965', 'step': 6868, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:55.575896', 'step': 6868, 'epoch': 3} {'type': 'loss', 'content': 0.0009781593689695, 'timestamp': '2025-09-10 02:32:55.577919', 'step': 6869, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:55.613728', 'step': 6869, 'epoch': 3} {'type': 'loss', 'content': 7.875356823205948e-05, 'timestamp': '2025-09-10 02:32:55.615435', 'step': 6870, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:55.647253', 'step': 6870, 'epoch': 3} {'type': 'loss', 'content': 9.778184903552756e-05, 'timestamp': '2025-09-10 02:32:55.649054', 'step': 6871, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:55.692917', 'step': 6871, 'epoch': 3} {'type': 'loss', 'content': 0.0014271338004618883, 'timestamp': '2025-09-10 02:32:55.716419', 'step': 6872, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:55.752675', 'step': 6872, 'epoch': 3} {'type': 'loss', 'content': 9.694429900264367e-05, 'timestamp': '2025-09-10 02:32:55.760824', 'step': 6873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:55.806388', 'step': 6873, 'epoch': 3} {'type': 'loss', 'content': 0.0006887580966576934, 'timestamp': '2025-09-10 02:32:55.808118', 'step': 6874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:55.836724', 'step': 6874, 'epoch': 3} {'type': 'loss', 'content': 7.986043783603236e-05, 'timestamp': '2025-09-10 02:32:55.838426', 'step': 6875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:55.868823', 'step': 6875, 'epoch': 3} {'type': 'loss', 'content': 0.0004202057025395334, 'timestamp': '2025-09-10 02:32:55.892226', 'step': 6876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:55.922324', 'step': 6876, 'epoch': 3} {'type': 'loss', 'content': 0.006292398553341627, 'timestamp': '2025-09-10 02:32:55.924247', 'step': 6877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:55.953390', 'step': 6877, 'epoch': 3} {'type': 'loss', 'content': 0.00038204342126846313, 'timestamp': '2025-09-10 02:32:55.961561', 'step': 6878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:56.000325', 'step': 6878, 'epoch': 3} {'type': 'loss', 'content': 0.00032601208658888936, 'timestamp': '2025-09-10 02:32:56.001989', 'step': 6879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:56.030910', 'step': 6879, 'epoch': 3} {'type': 'loss', 'content': 0.00011965764861088246, 'timestamp': '2025-09-10 02:32:56.054270', 'step': 6880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:56.097814', 'step': 6880, 'epoch': 3} {'type': 'loss', 'content': 0.00011441412789281458, 'timestamp': '2025-09-10 02:32:56.103750', 'step': 6881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:56.133193', 'step': 6881, 'epoch': 3} {'type': 'loss', 'content': 0.02021711878478527, 'timestamp': '2025-09-10 02:32:56.134830', 'step': 6882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:56.168973', 'step': 6882, 'epoch': 3} {'type': 'loss', 'content': 0.0012548385420814157, 'timestamp': '2025-09-10 02:32:56.170980', 'step': 6883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:56.199905', 'step': 6883, 'epoch': 3} {'type': 'loss', 'content': 0.0006658626953139901, 'timestamp': '2025-09-10 02:32:56.223227', 'step': 6884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:56.252247', 'step': 6884, 'epoch': 3} {'type': 'loss', 'content': 0.03124235011637211, 'timestamp': '2025-09-10 02:32:56.253886', 'step': 6885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:56.282630', 'step': 6885, 'epoch': 3} {'type': 'loss', 'content': 0.0007810547831468284, 'timestamp': '2025-09-10 02:32:56.284078', 'step': 6886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:56.312694', 'step': 6886, 'epoch': 3} {'type': 'loss', 'content': 4.096954216947779e-05, 'timestamp': '2025-09-10 02:32:56.314337', 'step': 6887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:56.342787', 'step': 6887, 'epoch': 3} {'type': 'loss', 'content': 9.618890180718154e-05, 'timestamp': '2025-09-10 02:32:56.366212', 'step': 6888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:56.395329', 'step': 6888, 'epoch': 3} {'type': 'loss', 'content': 0.00043241720413789153, 'timestamp': '2025-09-10 02:32:56.397012', 'step': 6889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:56.425907', 'step': 6889, 'epoch': 3} {'type': 'loss', 'content': 0.0001073941239155829, 'timestamp': '2025-09-10 02:32:56.427970', 'step': 6890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:56.456643', 'step': 6890, 'epoch': 3} {'type': 'loss', 'content': 8.71215743245557e-05, 'timestamp': '2025-09-10 02:32:56.458649', 'step': 6891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:56.487801', 'step': 6891, 'epoch': 3} {'type': 'loss', 'content': 0.0012700591469183564, 'timestamp': '2025-09-10 02:32:56.511353', 'step': 6892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:56.540092', 'step': 6892, 'epoch': 3} {'type': 'loss', 'content': 0.09516288340091705, 'timestamp': '2025-09-10 02:32:56.541939', 'step': 6893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:56.571189', 'step': 6893, 'epoch': 3} {'type': 'loss', 'content': 0.0011443132534623146, 'timestamp': '2025-09-10 02:32:56.573171', 'step': 6894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:56.602204', 'step': 6894, 'epoch': 3} {'type': 'loss', 'content': 8.956807141657919e-05, 'timestamp': '2025-09-10 02:32:56.604022', 'step': 6895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:56.633564', 'step': 6895, 'epoch': 3} {'type': 'loss', 'content': 0.001971913268789649, 'timestamp': '2025-09-10 02:32:56.656979', 'step': 6896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:56.685950', 'step': 6896, 'epoch': 3} {'type': 'loss', 'content': 0.00021504145115613937, 'timestamp': '2025-09-10 02:32:56.687876', 'step': 6897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:56.716539', 'step': 6897, 'epoch': 3} {'type': 'loss', 'content': 7.672259380342439e-05, 'timestamp': '2025-09-10 02:32:56.718730', 'step': 6898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:56.747996', 'step': 6898, 'epoch': 3} {'type': 'loss', 'content': 0.00014145906607154757, 'timestamp': '2025-09-10 02:32:56.749894', 'step': 6899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:56.781479', 'step': 6899, 'epoch': 3} {'type': 'loss', 'content': 8.570096542825922e-05, 'timestamp': '2025-09-10 02:32:56.804855', 'step': 6900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:56.833623', 'step': 6900, 'epoch': 3} {'type': 'loss', 'content': 0.00012143644562456757, 'timestamp': '2025-09-10 02:32:56.835406', 'step': 6901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:56.864335', 'step': 6901, 'epoch': 3} {'type': 'loss', 'content': 0.0001100988665712066, 'timestamp': '2025-09-10 02:32:56.866155', 'step': 6902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:56.895306', 'step': 6902, 'epoch': 3} {'type': 'loss', 'content': 0.008772674016654491, 'timestamp': '2025-09-10 02:32:56.897023', 'step': 6903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:56.926271', 'step': 6903, 'epoch': 3} {'type': 'loss', 'content': 0.00016054752632044256, 'timestamp': '2025-09-10 02:32:56.949762', 'step': 6904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:56.979384', 'step': 6904, 'epoch': 3} {'type': 'loss', 'content': 7.028750405879691e-05, 'timestamp': '2025-09-10 02:32:56.981063', 'step': 6905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:57.010381', 'step': 6905, 'epoch': 3} {'type': 'loss', 'content': 0.06985701620578766, 'timestamp': '2025-09-10 02:32:57.012016', 'step': 6906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:57.040903', 'step': 6906, 'epoch': 3} {'type': 'loss', 'content': 0.0037340200506150723, 'timestamp': '2025-09-10 02:32:57.042644', 'step': 6907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:57.071692', 'step': 6907, 'epoch': 3} {'type': 'loss', 'content': 0.00045398736256174743, 'timestamp': '2025-09-10 02:32:57.095336', 'step': 6908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:57.125388', 'step': 6908, 'epoch': 3} {'type': 'loss', 'content': 0.000166809419170022, 'timestamp': '2025-09-10 02:32:57.127196', 'step': 6909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:57.156540', 'step': 6909, 'epoch': 3} {'type': 'loss', 'content': 0.00012180476187495515, 'timestamp': '2025-09-10 02:32:57.158484', 'step': 6910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:57.188258', 'step': 6910, 'epoch': 3} {'type': 'loss', 'content': 0.000284523208392784, 'timestamp': '2025-09-10 02:32:57.189937', 'step': 6911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:57.218732', 'step': 6911, 'epoch': 3} {'type': 'loss', 'content': 0.00013130329898558557, 'timestamp': '2025-09-10 02:32:57.242470', 'step': 6912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:57.275039', 'step': 6912, 'epoch': 3} {'type': 'loss', 'content': 6.123074126662686e-05, 'timestamp': '2025-09-10 02:32:57.277314', 'step': 6913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:57.309551', 'step': 6913, 'epoch': 3} {'type': 'loss', 'content': 0.0004217438108753413, 'timestamp': '2025-09-10 02:32:57.311221', 'step': 6914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:57.342395', 'step': 6914, 'epoch': 3} {'type': 'loss', 'content': 0.026478629559278488, 'timestamp': '2025-09-10 02:32:57.344449', 'step': 6915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:57.380086', 'step': 6915, 'epoch': 3} {'type': 'loss', 'content': 0.00012950468226335943, 'timestamp': '2025-09-10 02:32:57.403492', 'step': 6916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:57.437685', 'step': 6916, 'epoch': 3} {'type': 'loss', 'content': 0.0018698758212849498, 'timestamp': '2025-09-10 02:32:57.439566', 'step': 6917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:57.472147', 'step': 6917, 'epoch': 3} {'type': 'loss', 'content': 0.0007063063676469028, 'timestamp': '2025-09-10 02:32:57.473801', 'step': 6918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:57.514566', 'step': 6918, 'epoch': 3} {'type': 'loss', 'content': 0.0015736583154648542, 'timestamp': '2025-09-10 02:32:57.516352', 'step': 6919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:57.547008', 'step': 6919, 'epoch': 3} {'type': 'loss', 'content': 0.00035860989009961486, 'timestamp': '2025-09-10 02:32:57.570523', 'step': 6920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:57.605124', 'step': 6920, 'epoch': 3} {'type': 'loss', 'content': 0.00014832009037490934, 'timestamp': '2025-09-10 02:32:57.609625', 'step': 6921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:57.645670', 'step': 6921, 'epoch': 3} {'type': 'loss', 'content': 0.0010603677947074175, 'timestamp': '2025-09-10 02:32:57.647678', 'step': 6922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:57.685549', 'step': 6922, 'epoch': 3} {'type': 'loss', 'content': 0.0002539857814554125, 'timestamp': '2025-09-10 02:32:57.687415', 'step': 6923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:57.729694', 'step': 6923, 'epoch': 3} {'type': 'loss', 'content': 0.00019526577671058476, 'timestamp': '2025-09-10 02:32:57.753269', 'step': 6924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:57.788113', 'step': 6924, 'epoch': 3} {'type': 'loss', 'content': 0.003988584503531456, 'timestamp': '2025-09-10 02:32:57.790469', 'step': 6925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:57.824316', 'step': 6925, 'epoch': 3} {'type': 'loss', 'content': 0.008360235020518303, 'timestamp': '2025-09-10 02:32:57.826514', 'step': 6926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:57.855788', 'step': 6926, 'epoch': 3} {'type': 'loss', 'content': 0.00013281036808621138, 'timestamp': '2025-09-10 02:32:57.857906', 'step': 6927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:57.887022', 'step': 6927, 'epoch': 3} {'type': 'loss', 'content': 7.877969619585201e-05, 'timestamp': '2025-09-10 02:32:57.910446', 'step': 6928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:57.939321', 'step': 6928, 'epoch': 3} {'type': 'loss', 'content': 0.0002676034055184573, 'timestamp': '2025-09-10 02:32:57.941482', 'step': 6929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:57.970366', 'step': 6929, 'epoch': 3} {'type': 'loss', 'content': 0.0010093670571222901, 'timestamp': '2025-09-10 02:32:57.972341', 'step': 6930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:58.002321', 'step': 6930, 'epoch': 3} {'type': 'loss', 'content': 0.0003377363027539104, 'timestamp': '2025-09-10 02:32:58.004457', 'step': 6931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:58.037541', 'step': 6931, 'epoch': 3} {'type': 'loss', 'content': 0.00020868994761258364, 'timestamp': '2025-09-10 02:32:58.060973', 'step': 6932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:58.091768', 'step': 6932, 'epoch': 3} {'type': 'loss', 'content': 0.00015123530465643853, 'timestamp': '2025-09-10 02:32:58.093725', 'step': 6933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:58.122595', 'step': 6933, 'epoch': 3} {'type': 'loss', 'content': 0.0010079393396154046, 'timestamp': '2025-09-10 02:32:58.124504', 'step': 6934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:58.153629', 'step': 6934, 'epoch': 3} {'type': 'loss', 'content': 0.001646541990339756, 'timestamp': '2025-09-10 02:32:58.155399', 'step': 6935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:58.184634', 'step': 6935, 'epoch': 3} {'type': 'loss', 'content': 0.014478239230811596, 'timestamp': '2025-09-10 02:32:58.207931', 'step': 6936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:58.237471', 'step': 6936, 'epoch': 3} {'type': 'loss', 'content': 0.0014481694670394063, 'timestamp': '2025-09-10 02:32:58.239187', 'step': 6937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:58.267818', 'step': 6937, 'epoch': 3} {'type': 'loss', 'content': 0.018990149721503258, 'timestamp': '2025-09-10 02:32:58.269765', 'step': 6938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:58.298365', 'step': 6938, 'epoch': 3} {'type': 'loss', 'content': 0.0002534259401727468, 'timestamp': '2025-09-10 02:32:58.300099', 'step': 6939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:58.328733', 'step': 6939, 'epoch': 3} {'type': 'loss', 'content': 0.0007595556089654565, 'timestamp': '2025-09-10 02:32:58.352099', 'step': 6940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:58.385807', 'step': 6940, 'epoch': 3} {'type': 'loss', 'content': 0.0003793592914007604, 'timestamp': '2025-09-10 02:32:58.387736', 'step': 6941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:32:58.416932', 'step': 6941, 'epoch': 3} {'type': 'loss', 'content': 0.00811021588742733, 'timestamp': '2025-09-10 02:32:58.418960', 'step': 6942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:58.447342', 'step': 6942, 'epoch': 3} {'type': 'loss', 'content': 0.00023596796381752938, 'timestamp': '2025-09-10 02:32:58.449167', 'step': 6943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:58.477692', 'step': 6943, 'epoch': 3} {'type': 'loss', 'content': 0.00923880934715271, 'timestamp': '2025-09-10 02:32:58.501069', 'step': 6944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:58.529658', 'step': 6944, 'epoch': 3} {'type': 'loss', 'content': 0.0009434501407667994, 'timestamp': '2025-09-10 02:32:58.531349', 'step': 6945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:58.560292', 'step': 6945, 'epoch': 3} {'type': 'loss', 'content': 0.0006749120657332242, 'timestamp': '2025-09-10 02:32:58.562118', 'step': 6946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:58.591017', 'step': 6946, 'epoch': 3} {'type': 'loss', 'content': 0.00024472290533594787, 'timestamp': '2025-09-10 02:32:58.592771', 'step': 6947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:58.621205', 'step': 6947, 'epoch': 3} {'type': 'loss', 'content': 0.007182929199188948, 'timestamp': '2025-09-10 02:32:58.644496', 'step': 6948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:58.673876', 'step': 6948, 'epoch': 3} {'type': 'loss', 'content': 0.001059431699104607, 'timestamp': '2025-09-10 02:32:58.675611', 'step': 6949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:58.704406', 'step': 6949, 'epoch': 3} {'type': 'loss', 'content': 0.0002584144822321832, 'timestamp': '2025-09-10 02:32:58.706115', 'step': 6950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:58.734683', 'step': 6950, 'epoch': 3} {'type': 'loss', 'content': 0.00016741511353757232, 'timestamp': '2025-09-10 02:32:58.736562', 'step': 6951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:58.766692', 'step': 6951, 'epoch': 3} {'type': 'loss', 'content': 0.01323452778160572, 'timestamp': '2025-09-10 02:32:58.790091', 'step': 6952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:58.823155', 'step': 6952, 'epoch': 3} {'type': 'loss', 'content': 0.0021357552614063025, 'timestamp': '2025-09-10 02:32:58.824819', 'step': 6953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:58.853576', 'step': 6953, 'epoch': 3} {'type': 'loss', 'content': 0.00036122865276411176, 'timestamp': '2025-09-10 02:32:58.855378', 'step': 6954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:58.884348', 'step': 6954, 'epoch': 3} {'type': 'loss', 'content': 0.001152566634118557, 'timestamp': '2025-09-10 02:32:58.886174', 'step': 6955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:58.915118', 'step': 6955, 'epoch': 3} {'type': 'loss', 'content': 0.0010581667302176356, 'timestamp': '2025-09-10 02:32:58.938593', 'step': 6956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:58.968249', 'step': 6956, 'epoch': 3} {'type': 'loss', 'content': 0.0008137312834151089, 'timestamp': '2025-09-10 02:32:58.970113', 'step': 6957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:58.999489', 'step': 6957, 'epoch': 3} {'type': 'loss', 'content': 0.00020094976935070008, 'timestamp': '2025-09-10 02:32:59.001321', 'step': 6958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:59.030461', 'step': 6958, 'epoch': 3} {'type': 'loss', 'content': 0.00020380767819005996, 'timestamp': '2025-09-10 02:32:59.032290', 'step': 6959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:59.060995', 'step': 6959, 'epoch': 3} {'type': 'loss', 'content': 0.0004710287321358919, 'timestamp': '2025-09-10 02:32:59.084163', 'step': 6960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:32:59.112888', 'step': 6960, 'epoch': 3} {'type': 'loss', 'content': 0.0007825446664355695, 'timestamp': '2025-09-10 02:32:59.114617', 'step': 6961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:59.143254', 'step': 6961, 'epoch': 3} {'type': 'loss', 'content': 0.002040523337200284, 'timestamp': '2025-09-10 02:32:59.144956', 'step': 6962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:59.174342', 'step': 6962, 'epoch': 3} {'type': 'loss', 'content': 0.004353491123765707, 'timestamp': '2025-09-10 02:32:59.176354', 'step': 6963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:59.204976', 'step': 6963, 'epoch': 3} {'type': 'loss', 'content': 0.0005209581577219069, 'timestamp': '2025-09-10 02:32:59.228125', 'step': 6964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:59.259906', 'step': 6964, 'epoch': 3} {'type': 'loss', 'content': 0.0013556017074733973, 'timestamp': '2025-09-10 02:32:59.261794', 'step': 6965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:59.291234', 'step': 6965, 'epoch': 3} {'type': 'loss', 'content': 0.00023635872639715672, 'timestamp': '2025-09-10 02:32:59.292948', 'step': 6966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:59.321455', 'step': 6966, 'epoch': 3} {'type': 'loss', 'content': 0.0002624272892717272, 'timestamp': '2025-09-10 02:32:59.323256', 'step': 6967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:59.354900', 'step': 6967, 'epoch': 3} {'type': 'loss', 'content': 0.0016566140111535788, 'timestamp': '2025-09-10 02:32:59.378256', 'step': 6968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:59.411602', 'step': 6968, 'epoch': 3} {'type': 'loss', 'content': 0.00015238292689900845, 'timestamp': '2025-09-10 02:32:59.413392', 'step': 6969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:59.445483', 'step': 6969, 'epoch': 3} {'type': 'loss', 'content': 0.028976481407880783, 'timestamp': '2025-09-10 02:32:59.447268', 'step': 6970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:59.479694', 'step': 6970, 'epoch': 3} {'type': 'loss', 'content': 0.0008458493393845856, 'timestamp': '2025-09-10 02:32:59.481457', 'step': 6971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:59.513976', 'step': 6971, 'epoch': 3} {'type': 'loss', 'content': 0.00027471824432723224, 'timestamp': '2025-09-10 02:32:59.537198', 'step': 6972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:59.570258', 'step': 6972, 'epoch': 3} {'type': 'loss', 'content': 0.003696908475831151, 'timestamp': '2025-09-10 02:32:59.572137', 'step': 6973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:59.604747', 'step': 6973, 'epoch': 3} {'type': 'loss', 'content': 0.00034436071291565895, 'timestamp': '2025-09-10 02:32:59.606590', 'step': 6974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:59.636562', 'step': 6974, 'epoch': 3} {'type': 'loss', 'content': 0.025353388860821724, 'timestamp': '2025-09-10 02:32:59.638402', 'step': 6975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:59.671670', 'step': 6975, 'epoch': 3} {'type': 'loss', 'content': 0.00040680429083295166, 'timestamp': '2025-09-10 02:32:59.694915', 'step': 6976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:59.728038', 'step': 6976, 'epoch': 3} {'type': 'loss', 'content': 0.0006491728127002716, 'timestamp': '2025-09-10 02:32:59.729800', 'step': 6977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:59.768954', 'step': 6977, 'epoch': 3} {'type': 'loss', 'content': 0.0016482959035784006, 'timestamp': '2025-09-10 02:32:59.770922', 'step': 6978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:59.807359', 'step': 6978, 'epoch': 3} {'type': 'loss', 'content': 0.00020466528076212853, 'timestamp': '2025-09-10 02:32:59.809113', 'step': 6979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:32:59.837863', 'step': 6979, 'epoch': 3} {'type': 'loss', 'content': 0.005594300571829081, 'timestamp': '2025-09-10 02:32:59.860979', 'step': 6980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:59.890241', 'step': 6980, 'epoch': 3} {'type': 'loss', 'content': 0.0012212840374559164, 'timestamp': '2025-09-10 02:32:59.892133', 'step': 6981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:59.921147', 'step': 6981, 'epoch': 3} {'type': 'loss', 'content': 0.0004984021070413291, 'timestamp': '2025-09-10 02:32:59.922835', 'step': 6982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:59.951706', 'step': 6982, 'epoch': 3} {'type': 'loss', 'content': 0.0018222949001938105, 'timestamp': '2025-09-10 02:32:59.953875', 'step': 6983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:32:59.982874', 'step': 6983, 'epoch': 3} {'type': 'loss', 'content': 0.000587698828894645, 'timestamp': '2025-09-10 02:33:00.006423', 'step': 6984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:00.036102', 'step': 6984, 'epoch': 3} {'type': 'loss', 'content': 0.0006888590869493783, 'timestamp': '2025-09-10 02:33:00.038102', 'step': 6985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:00.066948', 'step': 6985, 'epoch': 3} {'type': 'loss', 'content': 0.001037910464219749, 'timestamp': '2025-09-10 02:33:00.068866', 'step': 6986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:00.097635', 'step': 6986, 'epoch': 3} {'type': 'loss', 'content': 0.0006376398378051817, 'timestamp': '2025-09-10 02:33:00.099373', 'step': 6987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:00.128090', 'step': 6987, 'epoch': 3} {'type': 'loss', 'content': 0.0011590939247980714, 'timestamp': '2025-09-10 02:33:00.151318', 'step': 6988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:00.180167', 'step': 6988, 'epoch': 3} {'type': 'loss', 'content': 0.00022759917192161083, 'timestamp': '2025-09-10 02:33:00.181824', 'step': 6989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:00.210315', 'step': 6989, 'epoch': 3} {'type': 'loss', 'content': 0.0004471018328331411, 'timestamp': '2025-09-10 02:33:00.212205', 'step': 6990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:00.241366', 'step': 6990, 'epoch': 3} {'type': 'loss', 'content': 0.0002062628191197291, 'timestamp': '2025-09-10 02:33:00.243152', 'step': 6991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:00.271988', 'step': 6991, 'epoch': 3} {'type': 'loss', 'content': 0.007703538052737713, 'timestamp': '2025-09-10 02:33:00.295388', 'step': 6992, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1582003754624}], 'timestamp': '2025-09-10 02:33:02.197257', 'step': 6992, 'epoch': 3} {'type': 'pplx', 'content': 2788629.99139465, 'timestamp': '2025-09-10 02:33:02.198869', 'step': 6992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:02.226827', 'step': 6992, 'epoch': 3} {'type': 'loss', 'content': 0.0019857888109982014, 'timestamp': '2025-09-10 02:33:02.228421', 'step': 6993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:02.257719', 'step': 6993, 'epoch': 3} {'type': 'loss', 'content': 0.00016344898904208094, 'timestamp': '2025-09-10 02:33:02.259518', 'step': 6994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:02.288228', 'step': 6994, 'epoch': 3} {'type': 'loss', 'content': 0.0008041561814025044, 'timestamp': '2025-09-10 02:33:02.289747', 'step': 6995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:02.318588', 'step': 6995, 'epoch': 3} {'type': 'loss', 'content': 0.005006737541407347, 'timestamp': '2025-09-10 02:33:02.341876', 'step': 6996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:02.370884', 'step': 6996, 'epoch': 3} {'type': 'loss', 'content': 0.002978698117658496, 'timestamp': '2025-09-10 02:33:02.372871', 'step': 6997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:02.402050', 'step': 6997, 'epoch': 3} {'type': 'loss', 'content': 0.0004123369581066072, 'timestamp': '2025-09-10 02:33:02.403953', 'step': 6998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:02.433024', 'step': 6998, 'epoch': 3} {'type': 'loss', 'content': 0.0001701653382042423, 'timestamp': '2025-09-10 02:33:02.434885', 'step': 6999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:02.463977', 'step': 6999, 'epoch': 3} {'type': 'loss', 'content': 0.0028805527836084366, 'timestamp': '2025-09-10 02:33:02.487486', 'step': 7000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 7000', 'timestamp': '2025-09-10 02:33:07.082404', 'step': 7000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:07.120951', 'step': 7000, 'epoch': 3} {'type': 'loss', 'content': 0.0010814116103574634, 'timestamp': '2025-09-10 02:33:07.122798', 'step': 7001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:07.152114', 'step': 7001, 'epoch': 3} {'type': 'loss', 'content': 0.00018377190281171352, 'timestamp': '2025-09-10 02:33:07.153897', 'step': 7002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:07.183405', 'step': 7002, 'epoch': 3} {'type': 'loss', 'content': 0.00031134625896811485, 'timestamp': '2025-09-10 02:33:07.185103', 'step': 7003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:07.215771', 'step': 7003, 'epoch': 3} {'type': 'loss', 'content': 0.0001239635021192953, 'timestamp': '2025-09-10 02:33:07.239382', 'step': 7004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:07.271809', 'step': 7004, 'epoch': 3} {'type': 'loss', 'content': 0.006299679167568684, 'timestamp': '2025-09-10 02:33:07.273756', 'step': 7005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:07.306787', 'step': 7005, 'epoch': 3} {'type': 'loss', 'content': 0.0036267682444304228, 'timestamp': '2025-09-10 02:33:07.309180', 'step': 7006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:07.340179', 'step': 7006, 'epoch': 3} {'type': 'loss', 'content': 0.0032613552175462246, 'timestamp': '2025-09-10 02:33:07.341999', 'step': 7007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:07.371625', 'step': 7007, 'epoch': 3} {'type': 'loss', 'content': 0.0004091924347449094, 'timestamp': '2025-09-10 02:33:07.395328', 'step': 7008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:07.429746', 'step': 7008, 'epoch': 3} {'type': 'loss', 'content': 0.00021148764062672853, 'timestamp': '2025-09-10 02:33:07.431780', 'step': 7009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:07.462429', 'step': 7009, 'epoch': 3} {'type': 'loss', 'content': 0.00019017969316337258, 'timestamp': '2025-09-10 02:33:07.464148', 'step': 7010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:07.501175', 'step': 7010, 'epoch': 3} {'type': 'loss', 'content': 9.069267252925783e-05, 'timestamp': '2025-09-10 02:33:07.503016', 'step': 7011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:07.536144', 'step': 7011, 'epoch': 3} {'type': 'loss', 'content': 0.00021314578771125525, 'timestamp': '2025-09-10 02:33:07.559260', 'step': 7012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:07.593723', 'step': 7012, 'epoch': 3} {'type': 'loss', 'content': 0.00013223057612776756, 'timestamp': '2025-09-10 02:33:07.595786', 'step': 7013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:07.626792', 'step': 7013, 'epoch': 3} {'type': 'loss', 'content': 0.001440140069462359, 'timestamp': '2025-09-10 02:33:07.628721', 'step': 7014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:07.662020', 'step': 7014, 'epoch': 3} {'type': 'loss', 'content': 0.00016096878971438855, 'timestamp': '2025-09-10 02:33:07.663853', 'step': 7015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:07.700876', 'step': 7015, 'epoch': 3} {'type': 'loss', 'content': 0.003439195454120636, 'timestamp': '2025-09-10 02:33:07.723998', 'step': 7016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:07.763270', 'step': 7016, 'epoch': 3} {'type': 'loss', 'content': 0.00029929561424069107, 'timestamp': '2025-09-10 02:33:07.765048', 'step': 7017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:07.801808', 'step': 7017, 'epoch': 3} {'type': 'loss', 'content': 0.0007030866108834743, 'timestamp': '2025-09-10 02:33:07.804001', 'step': 7018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:07.832842', 'step': 7018, 'epoch': 3} {'type': 'loss', 'content': 0.00105840596370399, 'timestamp': '2025-09-10 02:33:07.834626', 'step': 7019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:07.863503', 'step': 7019, 'epoch': 3} {'type': 'loss', 'content': 0.00040280394023284316, 'timestamp': '2025-09-10 02:33:07.886653', 'step': 7020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:07.916026', 'step': 7020, 'epoch': 3} {'type': 'loss', 'content': 0.005881170276552439, 'timestamp': '2025-09-10 02:33:07.918026', 'step': 7021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:07.946997', 'step': 7021, 'epoch': 3} {'type': 'loss', 'content': 0.01138144638389349, 'timestamp': '2025-09-10 02:33:07.948827', 'step': 7022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:07.977492', 'step': 7022, 'epoch': 3} {'type': 'loss', 'content': 0.0017692273249849677, 'timestamp': '2025-09-10 02:33:07.979426', 'step': 7023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:08.009160', 'step': 7023, 'epoch': 3} {'type': 'loss', 'content': 0.00033304395037703216, 'timestamp': '2025-09-10 02:33:08.032143', 'step': 7024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:08.063251', 'step': 7024, 'epoch': 3} {'type': 'loss', 'content': 0.0007116887718439102, 'timestamp': '2025-09-10 02:33:08.065338', 'step': 7025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:08.094177', 'step': 7025, 'epoch': 3} {'type': 'loss', 'content': 0.00013543645036406815, 'timestamp': '2025-09-10 02:33:08.095933', 'step': 7026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:08.124474', 'step': 7026, 'epoch': 3} {'type': 'loss', 'content': 0.00411624601110816, 'timestamp': '2025-09-10 02:33:08.126108', 'step': 7027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:08.154812', 'step': 7027, 'epoch': 3} {'type': 'loss', 'content': 0.0014488935703411698, 'timestamp': '2025-09-10 02:33:08.178088', 'step': 7028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:08.206856', 'step': 7028, 'epoch': 3} {'type': 'loss', 'content': 0.0002482616691850126, 'timestamp': '2025-09-10 02:33:08.208596', 'step': 7029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:08.237202', 'step': 7029, 'epoch': 3} {'type': 'loss', 'content': 0.0005101663991808891, 'timestamp': '2025-09-10 02:33:08.239059', 'step': 7030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:08.267644', 'step': 7030, 'epoch': 3} {'type': 'loss', 'content': 9.328716259915382e-05, 'timestamp': '2025-09-10 02:33:08.269807', 'step': 7031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:08.298612', 'step': 7031, 'epoch': 3} {'type': 'loss', 'content': 0.00046753903734497726, 'timestamp': '2025-09-10 02:33:08.321581', 'step': 7032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:08.350133', 'step': 7032, 'epoch': 3} {'type': 'loss', 'content': 0.0002330717834411189, 'timestamp': '2025-09-10 02:33:08.351866', 'step': 7033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:08.380531', 'step': 7033, 'epoch': 3} {'type': 'loss', 'content': 0.0003099815803579986, 'timestamp': '2025-09-10 02:33:08.382339', 'step': 7034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:08.411941', 'step': 7034, 'epoch': 3} {'type': 'loss', 'content': 0.0008793718297965825, 'timestamp': '2025-09-10 02:33:08.413760', 'step': 7035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:08.442491', 'step': 7035, 'epoch': 3} {'type': 'loss', 'content': 9.09976297407411e-05, 'timestamp': '2025-09-10 02:33:08.466333', 'step': 7036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:08.495269', 'step': 7036, 'epoch': 3} {'type': 'loss', 'content': 0.0005195220583118498, 'timestamp': '2025-09-10 02:33:08.497078', 'step': 7037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:08.527802', 'step': 7037, 'epoch': 3} {'type': 'loss', 'content': 0.0562581941485405, 'timestamp': '2025-09-10 02:33:08.529672', 'step': 7038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:08.558419', 'step': 7038, 'epoch': 3} {'type': 'loss', 'content': 0.0029173591174185276, 'timestamp': '2025-09-10 02:33:08.560092', 'step': 7039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:08.588440', 'step': 7039, 'epoch': 3} {'type': 'loss', 'content': 0.00016022950876504183, 'timestamp': '2025-09-10 02:33:08.611833', 'step': 7040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:08.640821', 'step': 7040, 'epoch': 3} {'type': 'loss', 'content': 0.0002844734990503639, 'timestamp': '2025-09-10 02:33:08.642779', 'step': 7041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:08.671202', 'step': 7041, 'epoch': 3} {'type': 'loss', 'content': 0.005056020338088274, 'timestamp': '2025-09-10 02:33:08.672861', 'step': 7042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:08.701540', 'step': 7042, 'epoch': 3} {'type': 'loss', 'content': 8.879298547981307e-05, 'timestamp': '2025-09-10 02:33:08.703404', 'step': 7043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:08.732832', 'step': 7043, 'epoch': 3} {'type': 'loss', 'content': 0.00011597503180382773, 'timestamp': '2025-09-10 02:33:08.756015', 'step': 7044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:08.787776', 'step': 7044, 'epoch': 3} {'type': 'loss', 'content': 0.0006039845175109804, 'timestamp': '2025-09-10 02:33:08.789459', 'step': 7045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:08.822620', 'step': 7045, 'epoch': 3} {'type': 'loss', 'content': 0.0007330150110647082, 'timestamp': '2025-09-10 02:33:08.824583', 'step': 7046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:08.854166', 'step': 7046, 'epoch': 3} {'type': 'loss', 'content': 0.0001059885835275054, 'timestamp': '2025-09-10 02:33:08.855644', 'step': 7047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:08.884369', 'step': 7047, 'epoch': 3} {'type': 'loss', 'content': 0.006155488546937704, 'timestamp': '2025-09-10 02:33:08.907802', 'step': 7048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:08.936886', 'step': 7048, 'epoch': 3} {'type': 'loss', 'content': 0.00010457994358148426, 'timestamp': '2025-09-10 02:33:08.938901', 'step': 7049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:08.967971', 'step': 7049, 'epoch': 3} {'type': 'loss', 'content': 0.00033087312476709485, 'timestamp': '2025-09-10 02:33:08.970100', 'step': 7050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:08.999016', 'step': 7050, 'epoch': 3} {'type': 'loss', 'content': 0.00039945446769706905, 'timestamp': '2025-09-10 02:33:09.000899', 'step': 7051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:09.029868', 'step': 7051, 'epoch': 3} {'type': 'loss', 'content': 8.988494664663449e-05, 'timestamp': '2025-09-10 02:33:09.053710', 'step': 7052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:09.082648', 'step': 7052, 'epoch': 3} {'type': 'loss', 'content': 0.00014014882617630064, 'timestamp': '2025-09-10 02:33:09.084651', 'step': 7053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:09.113809', 'step': 7053, 'epoch': 3} {'type': 'loss', 'content': 0.04872704669833183, 'timestamp': '2025-09-10 02:33:09.115560', 'step': 7054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:09.145171', 'step': 7054, 'epoch': 3} {'type': 'loss', 'content': 0.00013906206004321575, 'timestamp': '2025-09-10 02:33:09.147007', 'step': 7055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:09.177313', 'step': 7055, 'epoch': 3} {'type': 'loss', 'content': 0.01986103318631649, 'timestamp': '2025-09-10 02:33:09.200518', 'step': 7056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:09.229809', 'step': 7056, 'epoch': 3} {'type': 'loss', 'content': 8.047802111832425e-05, 'timestamp': '2025-09-10 02:33:09.231626', 'step': 7057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:09.260482', 'step': 7057, 'epoch': 3} {'type': 'loss', 'content': 0.00015633231669198722, 'timestamp': '2025-09-10 02:33:09.262442', 'step': 7058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:09.291547', 'step': 7058, 'epoch': 3} {'type': 'loss', 'content': 0.00010946964175673202, 'timestamp': '2025-09-10 02:33:09.293625', 'step': 7059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:09.323136', 'step': 7059, 'epoch': 3} {'type': 'loss', 'content': 0.0009689336875453591, 'timestamp': '2025-09-10 02:33:09.346840', 'step': 7060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:09.376370', 'step': 7060, 'epoch': 3} {'type': 'loss', 'content': 0.00028793231467716396, 'timestamp': '2025-09-10 02:33:09.378375', 'step': 7061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:09.407627', 'step': 7061, 'epoch': 3} {'type': 'loss', 'content': 0.00020774765289388597, 'timestamp': '2025-09-10 02:33:09.409616', 'step': 7062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:09.438833', 'step': 7062, 'epoch': 3} {'type': 'loss', 'content': 0.00014935732178855687, 'timestamp': '2025-09-10 02:33:09.441043', 'step': 7063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:09.470048', 'step': 7063, 'epoch': 3} {'type': 'loss', 'content': 0.0001656820677453652, 'timestamp': '2025-09-10 02:33:09.493672', 'step': 7064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:09.523290', 'step': 7064, 'epoch': 3} {'type': 'loss', 'content': 0.0003780599217861891, 'timestamp': '2025-09-10 02:33:09.525548', 'step': 7065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:09.554806', 'step': 7065, 'epoch': 3} {'type': 'loss', 'content': 0.0020802337676286697, 'timestamp': '2025-09-10 02:33:09.556876', 'step': 7066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:09.586119', 'step': 7066, 'epoch': 3} {'type': 'loss', 'content': 5.816563134430908e-05, 'timestamp': '2025-09-10 02:33:09.588579', 'step': 7067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:09.617910', 'step': 7067, 'epoch': 3} {'type': 'loss', 'content': 0.0018779776291921735, 'timestamp': '2025-09-10 02:33:09.641306', 'step': 7068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:09.674707', 'step': 7068, 'epoch': 3} {'type': 'loss', 'content': 0.0007602676632814109, 'timestamp': '2025-09-10 02:33:09.676646', 'step': 7069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:09.705895', 'step': 7069, 'epoch': 3} {'type': 'loss', 'content': 0.0005338700721040368, 'timestamp': '2025-09-10 02:33:09.708641', 'step': 7070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:09.738086', 'step': 7070, 'epoch': 3} {'type': 'loss', 'content': 0.00016905389202293009, 'timestamp': '2025-09-10 02:33:09.740063', 'step': 7071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:09.768856', 'step': 7071, 'epoch': 3} {'type': 'loss', 'content': 0.00012695681652985513, 'timestamp': '2025-09-10 02:33:09.792327', 'step': 7072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:09.821947', 'step': 7072, 'epoch': 3} {'type': 'loss', 'content': 0.0005912245833314955, 'timestamp': '2025-09-10 02:33:09.823837', 'step': 7073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:09.852869', 'step': 7073, 'epoch': 3} {'type': 'loss', 'content': 0.00019086863903794438, 'timestamp': '2025-09-10 02:33:09.855130', 'step': 7074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:09.884344', 'step': 7074, 'epoch': 3} {'type': 'loss', 'content': 0.00011014730262104422, 'timestamp': '2025-09-10 02:33:09.886405', 'step': 7075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:09.915777', 'step': 7075, 'epoch': 3} {'type': 'loss', 'content': 0.00023562587739434093, 'timestamp': '2025-09-10 02:33:09.939531', 'step': 7076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:09.968972', 'step': 7076, 'epoch': 3} {'type': 'loss', 'content': 0.0007138791843317449, 'timestamp': '2025-09-10 02:33:09.971048', 'step': 7077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:10.000140', 'step': 7077, 'epoch': 3} {'type': 'loss', 'content': 0.009201214648783207, 'timestamp': '2025-09-10 02:33:10.002037', 'step': 7078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:10.031061', 'step': 7078, 'epoch': 3} {'type': 'loss', 'content': 0.00014139436825644225, 'timestamp': '2025-09-10 02:33:10.032945', 'step': 7079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:10.061988', 'step': 7079, 'epoch': 3} {'type': 'loss', 'content': 7.881080091465265e-05, 'timestamp': '2025-09-10 02:33:10.085332', 'step': 7080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:10.114309', 'step': 7080, 'epoch': 3} {'type': 'loss', 'content': 0.0001724832400213927, 'timestamp': '2025-09-10 02:33:10.116108', 'step': 7081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:10.145088', 'step': 7081, 'epoch': 3} {'type': 'loss', 'content': 5.758378028986044e-05, 'timestamp': '2025-09-10 02:33:10.147419', 'step': 7082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:10.177042', 'step': 7082, 'epoch': 3} {'type': 'loss', 'content': 8.480289397994056e-05, 'timestamp': '2025-09-10 02:33:10.179577', 'step': 7083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:10.212970', 'step': 7083, 'epoch': 3} {'type': 'loss', 'content': 0.0001304411853197962, 'timestamp': '2025-09-10 02:33:10.236423', 'step': 7084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:10.275451', 'step': 7084, 'epoch': 3} {'type': 'loss', 'content': 0.00011326195817673579, 'timestamp': '2025-09-10 02:33:10.278168', 'step': 7085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:10.318027', 'step': 7085, 'epoch': 3} {'type': 'loss', 'content': 0.00013952630979474634, 'timestamp': '2025-09-10 02:33:10.320107', 'step': 7086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:10.360881', 'step': 7086, 'epoch': 3} {'type': 'loss', 'content': 0.00010285182361258194, 'timestamp': '2025-09-10 02:33:10.363305', 'step': 7087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:10.404923', 'step': 7087, 'epoch': 3} {'type': 'loss', 'content': 0.0013592649484053254, 'timestamp': '2025-09-10 02:33:10.428124', 'step': 7088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:10.460867', 'step': 7088, 'epoch': 3} {'type': 'loss', 'content': 0.001080493675544858, 'timestamp': '2025-09-10 02:33:10.462765', 'step': 7089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:10.492077', 'step': 7089, 'epoch': 3} {'type': 'loss', 'content': 0.0003453026874922216, 'timestamp': '2025-09-10 02:33:10.494152', 'step': 7090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:10.523051', 'step': 7090, 'epoch': 3} {'type': 'loss', 'content': 0.00012879457790404558, 'timestamp': '2025-09-10 02:33:10.525023', 'step': 7091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:10.553881', 'step': 7091, 'epoch': 3} {'type': 'loss', 'content': 5.4374802857637405e-05, 'timestamp': '2025-09-10 02:33:10.577498', 'step': 7092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:10.606928', 'step': 7092, 'epoch': 3} {'type': 'loss', 'content': 0.00010372676479164511, 'timestamp': '2025-09-10 02:33:10.608820', 'step': 7093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:10.637850', 'step': 7093, 'epoch': 3} {'type': 'loss', 'content': 7.86098898970522e-05, 'timestamp': '2025-09-10 02:33:10.639884', 'step': 7094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:10.668905', 'step': 7094, 'epoch': 3} {'type': 'loss', 'content': 0.0003556795709300786, 'timestamp': '2025-09-10 02:33:10.671000', 'step': 7095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:10.700400', 'step': 7095, 'epoch': 3} {'type': 'loss', 'content': 8.15380408312194e-05, 'timestamp': '2025-09-10 02:33:10.723675', 'step': 7096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:10.753461', 'step': 7096, 'epoch': 3} {'type': 'loss', 'content': 6.984026549616829e-05, 'timestamp': '2025-09-10 02:33:10.755185', 'step': 7097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:10.786817', 'step': 7097, 'epoch': 3} {'type': 'loss', 'content': 0.00014169642236083746, 'timestamp': '2025-09-10 02:33:10.788852', 'step': 7098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:10.822338', 'step': 7098, 'epoch': 3} {'type': 'loss', 'content': 8.376573532586917e-05, 'timestamp': '2025-09-10 02:33:10.824023', 'step': 7099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:10.852999', 'step': 7099, 'epoch': 3} {'type': 'loss', 'content': 6.304789712885395e-05, 'timestamp': '2025-09-10 02:33:10.876561', 'step': 7100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:10.905811', 'step': 7100, 'epoch': 3} {'type': 'loss', 'content': 6.13820884609595e-05, 'timestamp': '2025-09-10 02:33:10.907445', 'step': 7101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:10.936023', 'step': 7101, 'epoch': 3} {'type': 'loss', 'content': 0.00020228189532645047, 'timestamp': '2025-09-10 02:33:10.937722', 'step': 7102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:10.966383', 'step': 7102, 'epoch': 3} {'type': 'loss', 'content': 9.113523265114054e-05, 'timestamp': '2025-09-10 02:33:10.968095', 'step': 7103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:10.997019', 'step': 7103, 'epoch': 3} {'type': 'loss', 'content': 0.0001561507669975981, 'timestamp': '2025-09-10 02:33:11.020212', 'step': 7104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:11.048872', 'step': 7104, 'epoch': 3} {'type': 'loss', 'content': 0.02126910910010338, 'timestamp': '2025-09-10 02:33:11.050647', 'step': 7105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:11.079591', 'step': 7105, 'epoch': 3} {'type': 'loss', 'content': 8.194655674742535e-05, 'timestamp': '2025-09-10 02:33:11.081327', 'step': 7106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:11.110037', 'step': 7106, 'epoch': 3} {'type': 'loss', 'content': 0.00017607762129046023, 'timestamp': '2025-09-10 02:33:11.112060', 'step': 7107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:11.140932', 'step': 7107, 'epoch': 3} {'type': 'loss', 'content': 0.0005335750174708664, 'timestamp': '2025-09-10 02:33:11.164766', 'step': 7108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:11.194000', 'step': 7108, 'epoch': 3} {'type': 'loss', 'content': 0.0002091860951622948, 'timestamp': '2025-09-10 02:33:11.195819', 'step': 7109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:11.223958', 'step': 7109, 'epoch': 3} {'type': 'loss', 'content': 6.13267402513884e-05, 'timestamp': '2025-09-10 02:33:11.225777', 'step': 7110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:11.254286', 'step': 7110, 'epoch': 3} {'type': 'loss', 'content': 9.796098311198875e-05, 'timestamp': '2025-09-10 02:33:11.256209', 'step': 7111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:11.285231', 'step': 7111, 'epoch': 3} {'type': 'loss', 'content': 8.225706551456824e-05, 'timestamp': '2025-09-10 02:33:11.308549', 'step': 7112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:11.337941', 'step': 7112, 'epoch': 3} {'type': 'loss', 'content': 0.0002503559517208487, 'timestamp': '2025-09-10 02:33:11.339600', 'step': 7113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:11.368638', 'step': 7113, 'epoch': 3} {'type': 'loss', 'content': 0.00012760628305841237, 'timestamp': '2025-09-10 02:33:11.370376', 'step': 7114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:11.399365', 'step': 7114, 'epoch': 3} {'type': 'loss', 'content': 0.030732514336705208, 'timestamp': '2025-09-10 02:33:11.401676', 'step': 7115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:11.430164', 'step': 7115, 'epoch': 3} {'type': 'loss', 'content': 0.0001161035688710399, 'timestamp': '2025-09-10 02:33:11.453389', 'step': 7116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:11.482628', 'step': 7116, 'epoch': 3} {'type': 'loss', 'content': 0.00017230946104973555, 'timestamp': '2025-09-10 02:33:11.484106', 'step': 7117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:11.512805', 'step': 7117, 'epoch': 3} {'type': 'loss', 'content': 0.0063788327388465405, 'timestamp': '2025-09-10 02:33:11.514453', 'step': 7118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:11.543460', 'step': 7118, 'epoch': 3} {'type': 'loss', 'content': 0.008579443208873272, 'timestamp': '2025-09-10 02:33:11.545130', 'step': 7119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:11.573738', 'step': 7119, 'epoch': 3} {'type': 'loss', 'content': 9.17142751859501e-05, 'timestamp': '2025-09-10 02:33:11.597047', 'step': 7120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:11.626131', 'step': 7120, 'epoch': 3} {'type': 'loss', 'content': 0.0007114761392585933, 'timestamp': '2025-09-10 02:33:11.628906', 'step': 7121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:11.658035', 'step': 7121, 'epoch': 3} {'type': 'loss', 'content': 5.198407598072663e-05, 'timestamp': '2025-09-10 02:33:11.659818', 'step': 7122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:11.688805', 'step': 7122, 'epoch': 3} {'type': 'loss', 'content': 0.00018394214566797018, 'timestamp': '2025-09-10 02:33:11.691009', 'step': 7123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:11.720150', 'step': 7123, 'epoch': 3} {'type': 'loss', 'content': 0.00014498808013740927, 'timestamp': '2025-09-10 02:33:11.743525', 'step': 7124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:11.773032', 'step': 7124, 'epoch': 3} {'type': 'loss', 'content': 0.002670370042324066, 'timestamp': '2025-09-10 02:33:11.774689', 'step': 7125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:11.803096', 'step': 7125, 'epoch': 3} {'type': 'loss', 'content': 0.00017485507123637944, 'timestamp': '2025-09-10 02:33:11.804724', 'step': 7126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:11.833289', 'step': 7126, 'epoch': 3} {'type': 'loss', 'content': 0.01103391032665968, 'timestamp': '2025-09-10 02:33:11.834951', 'step': 7127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:11.863638', 'step': 7127, 'epoch': 3} {'type': 'loss', 'content': 0.0075844330713152885, 'timestamp': '2025-09-10 02:33:11.886946', 'step': 7128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:11.915880', 'step': 7128, 'epoch': 3} {'type': 'loss', 'content': 4.473465378396213e-05, 'timestamp': '2025-09-10 02:33:11.917567', 'step': 7129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:11.945908', 'step': 7129, 'epoch': 3} {'type': 'loss', 'content': 0.018360259011387825, 'timestamp': '2025-09-10 02:33:11.947553', 'step': 7130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:11.976286', 'step': 7130, 'epoch': 3} {'type': 'loss', 'content': 0.00010080776701215655, 'timestamp': '2025-09-10 02:33:11.978285', 'step': 7131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:12.007334', 'step': 7131, 'epoch': 3} {'type': 'loss', 'content': 0.00028588445275090635, 'timestamp': '2025-09-10 02:33:12.031048', 'step': 7132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:12.060107', 'step': 7132, 'epoch': 3} {'type': 'loss', 'content': 0.00037449359660968184, 'timestamp': '2025-09-10 02:33:12.062265', 'step': 7133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:12.090891', 'step': 7133, 'epoch': 3} {'type': 'loss', 'content': 6.24270542175509e-05, 'timestamp': '2025-09-10 02:33:12.093127', 'step': 7134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:12.123322', 'step': 7134, 'epoch': 3} {'type': 'loss', 'content': 0.0013547196285799146, 'timestamp': '2025-09-10 02:33:12.125008', 'step': 7135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:12.153806', 'step': 7135, 'epoch': 3} {'type': 'loss', 'content': 0.0015526112401857972, 'timestamp': '2025-09-10 02:33:12.178427', 'step': 7136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:12.213235', 'step': 7136, 'epoch': 3} {'type': 'loss', 'content': 7.431041012750939e-05, 'timestamp': '2025-09-10 02:33:12.215082', 'step': 7137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:12.246318', 'step': 7137, 'epoch': 3} {'type': 'loss', 'content': 0.00011348089174134657, 'timestamp': '2025-09-10 02:33:12.248304', 'step': 7138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:12.287892', 'step': 7138, 'epoch': 3} {'type': 'loss', 'content': 0.0002764788514468819, 'timestamp': '2025-09-10 02:33:12.289518', 'step': 7139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:12.330768', 'step': 7139, 'epoch': 3} {'type': 'loss', 'content': 0.00024690685677342117, 'timestamp': '2025-09-10 02:33:12.353954', 'step': 7140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:12.393309', 'step': 7140, 'epoch': 3} {'type': 'loss', 'content': 0.0007415753207169473, 'timestamp': '2025-09-10 02:33:12.395041', 'step': 7141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:12.436214', 'step': 7141, 'epoch': 3} {'type': 'loss', 'content': 0.026568885892629623, 'timestamp': '2025-09-10 02:33:12.437929', 'step': 7142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:12.470612', 'step': 7142, 'epoch': 3} {'type': 'loss', 'content': 0.00014456224744208157, 'timestamp': '2025-09-10 02:33:12.472229', 'step': 7143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:12.501086', 'step': 7143, 'epoch': 3} {'type': 'loss', 'content': 0.00011599133722484112, 'timestamp': '2025-09-10 02:33:12.524580', 'step': 7144, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1582003754624}], 'timestamp': '2025-09-10 02:33:14.415894', 'step': 7144, 'epoch': 3} {'type': 'pplx', 'content': 2708584.2478265725, 'timestamp': '2025-09-10 02:33:14.417794', 'step': 7144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:14.457237', 'step': 7144, 'epoch': 3} {'type': 'loss', 'content': 0.00030244843219406903, 'timestamp': '2025-09-10 02:33:14.459456', 'step': 7145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:14.495339', 'step': 7145, 'epoch': 3} {'type': 'loss', 'content': 0.03959156200289726, 'timestamp': '2025-09-10 02:33:14.497484', 'step': 7146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:14.526610', 'step': 7146, 'epoch': 3} {'type': 'loss', 'content': 0.020285243168473244, 'timestamp': '2025-09-10 02:33:14.528282', 'step': 7147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:14.557424', 'step': 7147, 'epoch': 3} {'type': 'loss', 'content': 0.00016595340275671333, 'timestamp': '2025-09-10 02:33:14.580963', 'step': 7148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:14.609648', 'step': 7148, 'epoch': 3} {'type': 'loss', 'content': 0.00016575872723478824, 'timestamp': '2025-09-10 02:33:14.611955', 'step': 7149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:14.640692', 'step': 7149, 'epoch': 3} {'type': 'loss', 'content': 0.0009058531140908599, 'timestamp': '2025-09-10 02:33:14.642661', 'step': 7150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:14.671529', 'step': 7150, 'epoch': 3} {'type': 'loss', 'content': 9.503169712843373e-05, 'timestamp': '2025-09-10 02:33:14.673479', 'step': 7151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:14.702824', 'step': 7151, 'epoch': 3} {'type': 'loss', 'content': 0.00025164708495140076, 'timestamp': '2025-09-10 02:33:14.726531', 'step': 7152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:14.755504', 'step': 7152, 'epoch': 3} {'type': 'loss', 'content': 0.00029217940755188465, 'timestamp': '2025-09-10 02:33:14.757421', 'step': 7153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:14.787597', 'step': 7153, 'epoch': 3} {'type': 'loss', 'content': 0.0020050134044140577, 'timestamp': '2025-09-10 02:33:14.789557', 'step': 7154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:14.826020', 'step': 7154, 'epoch': 3} {'type': 'loss', 'content': 0.00011182740854565054, 'timestamp': '2025-09-10 02:33:14.827768', 'step': 7155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:14.856359', 'step': 7155, 'epoch': 3} {'type': 'loss', 'content': 0.00018755366909317672, 'timestamp': '2025-09-10 02:33:14.881846', 'step': 7156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:14.910895', 'step': 7156, 'epoch': 3} {'type': 'loss', 'content': 8.767979306867346e-05, 'timestamp': '2025-09-10 02:33:14.912692', 'step': 7157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:14.941693', 'step': 7157, 'epoch': 3} {'type': 'loss', 'content': 0.00010503112571313977, 'timestamp': '2025-09-10 02:33:14.943494', 'step': 7158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:14.972214', 'step': 7158, 'epoch': 3} {'type': 'loss', 'content': 0.00032662873854860663, 'timestamp': '2025-09-10 02:33:14.973935', 'step': 7159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:15.002913', 'step': 7159, 'epoch': 3} {'type': 'loss', 'content': 0.00044180676923133433, 'timestamp': '2025-09-10 02:33:15.026444', 'step': 7160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:15.055900', 'step': 7160, 'epoch': 3} {'type': 'loss', 'content': 0.00045780817163176835, 'timestamp': '2025-09-10 02:33:15.057788', 'step': 7161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:15.087130', 'step': 7161, 'epoch': 3} {'type': 'loss', 'content': 0.05401367321610451, 'timestamp': '2025-09-10 02:33:15.089430', 'step': 7162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:15.118307', 'step': 7162, 'epoch': 3} {'type': 'loss', 'content': 0.0010221587726846337, 'timestamp': '2025-09-10 02:33:15.120440', 'step': 7163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:15.150325', 'step': 7163, 'epoch': 3} {'type': 'loss', 'content': 0.00013791497622150928, 'timestamp': '2025-09-10 02:33:15.173758', 'step': 7164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:15.203079', 'step': 7164, 'epoch': 3} {'type': 'loss', 'content': 9.647140541346744e-05, 'timestamp': '2025-09-10 02:33:15.205021', 'step': 7165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:15.235398', 'step': 7165, 'epoch': 3} {'type': 'loss', 'content': 0.007709108758717775, 'timestamp': '2025-09-10 02:33:15.237422', 'step': 7166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:15.267408', 'step': 7166, 'epoch': 3} {'type': 'loss', 'content': 0.024306802079081535, 'timestamp': '2025-09-10 02:33:15.269540', 'step': 7167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:15.299283', 'step': 7167, 'epoch': 3} {'type': 'loss', 'content': 0.00029987801099196076, 'timestamp': '2025-09-10 02:33:15.322949', 'step': 7168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:15.351698', 'step': 7168, 'epoch': 3} {'type': 'loss', 'content': 0.00010465878585819155, 'timestamp': '2025-09-10 02:33:15.353859', 'step': 7169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:15.382813', 'step': 7169, 'epoch': 3} {'type': 'loss', 'content': 0.0001675562234595418, 'timestamp': '2025-09-10 02:33:15.384924', 'step': 7170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:15.413753', 'step': 7170, 'epoch': 3} {'type': 'loss', 'content': 0.00011116742098238319, 'timestamp': '2025-09-10 02:33:15.415792', 'step': 7171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:15.445616', 'step': 7171, 'epoch': 3} {'type': 'loss', 'content': 7.266044121934101e-05, 'timestamp': '2025-09-10 02:33:15.469016', 'step': 7172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:15.498326', 'step': 7172, 'epoch': 3} {'type': 'loss', 'content': 0.0003432599769439548, 'timestamp': '2025-09-10 02:33:15.499996', 'step': 7173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:15.528880', 'step': 7173, 'epoch': 3} {'type': 'loss', 'content': 0.0017077813390642405, 'timestamp': '2025-09-10 02:33:15.530784', 'step': 7174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:15.559697', 'step': 7174, 'epoch': 3} {'type': 'loss', 'content': 0.011404238641262054, 'timestamp': '2025-09-10 02:33:15.561644', 'step': 7175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:15.590602', 'step': 7175, 'epoch': 3} {'type': 'loss', 'content': 0.0005473392084240913, 'timestamp': '2025-09-10 02:33:15.613750', 'step': 7176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:15.642703', 'step': 7176, 'epoch': 3} {'type': 'loss', 'content': 0.016036823391914368, 'timestamp': '2025-09-10 02:33:15.644821', 'step': 7177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:15.673697', 'step': 7177, 'epoch': 3} {'type': 'loss', 'content': 0.00032304725027643144, 'timestamp': '2025-09-10 02:33:15.675865', 'step': 7178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:15.704670', 'step': 7178, 'epoch': 3} {'type': 'loss', 'content': 0.00021238785120658576, 'timestamp': '2025-09-10 02:33:15.707587', 'step': 7179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:15.736953', 'step': 7179, 'epoch': 3} {'type': 'loss', 'content': 0.00025740201817825437, 'timestamp': '2025-09-10 02:33:15.760971', 'step': 7180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:15.790311', 'step': 7180, 'epoch': 3} {'type': 'loss', 'content': 0.0022035904694348574, 'timestamp': '2025-09-10 02:33:15.792545', 'step': 7181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:15.822862', 'step': 7181, 'epoch': 3} {'type': 'loss', 'content': 0.01289613451808691, 'timestamp': '2025-09-10 02:33:15.825429', 'step': 7182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:15.854335', 'step': 7182, 'epoch': 3} {'type': 'loss', 'content': 0.00020831004076171666, 'timestamp': '2025-09-10 02:33:15.856443', 'step': 7183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:15.886240', 'step': 7183, 'epoch': 3} {'type': 'loss', 'content': 0.0003103218332398683, 'timestamp': '2025-09-10 02:33:15.909831', 'step': 7184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:15.939262', 'step': 7184, 'epoch': 3} {'type': 'loss', 'content': 0.008841032162308693, 'timestamp': '2025-09-10 02:33:15.941421', 'step': 7185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:15.970682', 'step': 7185, 'epoch': 3} {'type': 'loss', 'content': 0.000483514042571187, 'timestamp': '2025-09-10 02:33:15.972605', 'step': 7186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:16.001450', 'step': 7186, 'epoch': 3} {'type': 'loss', 'content': 5.5209828133229166e-05, 'timestamp': '2025-09-10 02:33:16.003340', 'step': 7187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:16.034228', 'step': 7187, 'epoch': 3} {'type': 'loss', 'content': 0.00037289224565029144, 'timestamp': '2025-09-10 02:33:16.057735', 'step': 7188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:16.087185', 'step': 7188, 'epoch': 3} {'type': 'loss', 'content': 0.0008058823295868933, 'timestamp': '2025-09-10 02:33:16.089214', 'step': 7189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:16.118354', 'step': 7189, 'epoch': 3} {'type': 'loss', 'content': 5.910907202633098e-05, 'timestamp': '2025-09-10 02:33:16.120523', 'step': 7190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:16.149531', 'step': 7190, 'epoch': 3} {'type': 'loss', 'content': 0.000479226466268301, 'timestamp': '2025-09-10 02:33:16.151451', 'step': 7191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:16.180606', 'step': 7191, 'epoch': 3} {'type': 'loss', 'content': 0.00018494235700927675, 'timestamp': '2025-09-10 02:33:16.203847', 'step': 7192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:16.238089', 'step': 7192, 'epoch': 3} {'type': 'loss', 'content': 0.00022544446983374655, 'timestamp': '2025-09-10 02:33:16.239676', 'step': 7193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:16.279322', 'step': 7193, 'epoch': 3} {'type': 'loss', 'content': 0.0011998852714896202, 'timestamp': '2025-09-10 02:33:16.281055', 'step': 7194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:16.322118', 'step': 7194, 'epoch': 3} {'type': 'loss', 'content': 0.0002667410299181938, 'timestamp': '2025-09-10 02:33:16.323774', 'step': 7195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:16.360392', 'step': 7195, 'epoch': 3} {'type': 'loss', 'content': 0.00021100246522109956, 'timestamp': '2025-09-10 02:33:16.383923', 'step': 7196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:16.425492', 'step': 7196, 'epoch': 3} {'type': 'loss', 'content': 5.4788488341728225e-05, 'timestamp': '2025-09-10 02:33:16.427338', 'step': 7197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:16.466318', 'step': 7197, 'epoch': 3} {'type': 'loss', 'content': 0.008930227719247341, 'timestamp': '2025-09-10 02:33:16.468183', 'step': 7198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:16.505984', 'step': 7198, 'epoch': 3} {'type': 'loss', 'content': 0.000940874801017344, 'timestamp': '2025-09-10 02:33:16.507819', 'step': 7199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:16.541318', 'step': 7199, 'epoch': 3} {'type': 'loss', 'content': 0.0004326365306042135, 'timestamp': '2025-09-10 02:33:16.564740', 'step': 7200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:16.593817', 'step': 7200, 'epoch': 3} {'type': 'loss', 'content': 0.0010036567691713572, 'timestamp': '2025-09-10 02:33:16.599972', 'step': 7201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:16.629347', 'step': 7201, 'epoch': 3} {'type': 'loss', 'content': 0.0004632103373296559, 'timestamp': '2025-09-10 02:33:16.636037', 'step': 7202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:16.668183', 'step': 7202, 'epoch': 3} {'type': 'loss', 'content': 0.000718653725925833, 'timestamp': '2025-09-10 02:33:16.676007', 'step': 7203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:16.710080', 'step': 7203, 'epoch': 3} {'type': 'loss', 'content': 0.0009695276385173202, 'timestamp': '2025-09-10 02:33:16.733599', 'step': 7204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:16.774220', 'step': 7204, 'epoch': 3} {'type': 'loss', 'content': 9.65236104093492e-05, 'timestamp': '2025-09-10 02:33:16.779725', 'step': 7205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:16.812169', 'step': 7205, 'epoch': 3} {'type': 'loss', 'content': 0.00014549396291840822, 'timestamp': '2025-09-10 02:33:16.813981', 'step': 7206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:16.849532', 'step': 7206, 'epoch': 3} {'type': 'loss', 'content': 0.0021547910291701555, 'timestamp': '2025-09-10 02:33:16.851563', 'step': 7207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:16.880700', 'step': 7207, 'epoch': 3} {'type': 'loss', 'content': 0.000640452781226486, 'timestamp': '2025-09-10 02:33:16.904085', 'step': 7208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:16.936703', 'step': 7208, 'epoch': 3} {'type': 'loss', 'content': 0.00038810717524029315, 'timestamp': '2025-09-10 02:33:16.939812', 'step': 7209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:16.968807', 'step': 7209, 'epoch': 3} {'type': 'loss', 'content': 0.0013020496116951108, 'timestamp': '2025-09-10 02:33:16.970418', 'step': 7210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:16.999788', 'step': 7210, 'epoch': 3} {'type': 'loss', 'content': 0.00020852847956120968, 'timestamp': '2025-09-10 02:33:17.001413', 'step': 7211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:17.030282', 'step': 7211, 'epoch': 3} {'type': 'loss', 'content': 0.0005294812144711614, 'timestamp': '2025-09-10 02:33:17.053668', 'step': 7212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:17.082684', 'step': 7212, 'epoch': 3} {'type': 'loss', 'content': 0.00015804909344296902, 'timestamp': '2025-09-10 02:33:17.084679', 'step': 7213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:17.113574', 'step': 7213, 'epoch': 3} {'type': 'loss', 'content': 0.03607497364282608, 'timestamp': '2025-09-10 02:33:17.115833', 'step': 7214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:17.148880', 'step': 7214, 'epoch': 3} {'type': 'loss', 'content': 0.0005641308380290866, 'timestamp': '2025-09-10 02:33:17.158129', 'step': 7215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:17.190234', 'step': 7215, 'epoch': 3} {'type': 'loss', 'content': 0.0009306627907790244, 'timestamp': '2025-09-10 02:33:17.213721', 'step': 7216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:17.245350', 'step': 7216, 'epoch': 3} {'type': 'loss', 'content': 0.000797395478002727, 'timestamp': '2025-09-10 02:33:17.247450', 'step': 7217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:17.276695', 'step': 7217, 'epoch': 3} {'type': 'loss', 'content': 0.01312278863042593, 'timestamp': '2025-09-10 02:33:17.280277', 'step': 7218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:17.311221', 'step': 7218, 'epoch': 3} {'type': 'loss', 'content': 0.00019354607502464205, 'timestamp': '2025-09-10 02:33:17.313265', 'step': 7219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:17.342798', 'step': 7219, 'epoch': 3} {'type': 'loss', 'content': 0.00018217017350252718, 'timestamp': '2025-09-10 02:33:17.366426', 'step': 7220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:17.396146', 'step': 7220, 'epoch': 3} {'type': 'loss', 'content': 0.00048028648598119617, 'timestamp': '2025-09-10 02:33:17.397984', 'step': 7221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:17.426880', 'step': 7221, 'epoch': 3} {'type': 'loss', 'content': 0.0011663679033517838, 'timestamp': '2025-09-10 02:33:17.428664', 'step': 7222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:17.457813', 'step': 7222, 'epoch': 3} {'type': 'loss', 'content': 0.0005598780116997659, 'timestamp': '2025-09-10 02:33:17.459676', 'step': 7223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:17.488698', 'step': 7223, 'epoch': 3} {'type': 'loss', 'content': 0.0002811321464832872, 'timestamp': '2025-09-10 02:33:17.512109', 'step': 7224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:17.541059', 'step': 7224, 'epoch': 3} {'type': 'loss', 'content': 0.00015632924623787403, 'timestamp': '2025-09-10 02:33:17.542947', 'step': 7225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-10 02:33:17.572000', 'step': 7225, 'epoch': 3} {'type': 'loss', 'content': 0.01196103822439909, 'timestamp': '2025-09-10 02:33:17.574200', 'step': 7226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:17.603650', 'step': 7226, 'epoch': 3} {'type': 'loss', 'content': 5.215074270381592e-05, 'timestamp': '2025-09-10 02:33:17.605374', 'step': 7227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:17.634926', 'step': 7227, 'epoch': 3} {'type': 'loss', 'content': 0.0003372817882336676, 'timestamp': '2025-09-10 02:33:17.658472', 'step': 7228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:17.688762', 'step': 7228, 'epoch': 3} {'type': 'loss', 'content': 0.00027380246319808066, 'timestamp': '2025-09-10 02:33:17.690589', 'step': 7229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:17.720040', 'step': 7229, 'epoch': 3} {'type': 'loss', 'content': 0.001981860725209117, 'timestamp': '2025-09-10 02:33:17.721710', 'step': 7230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:17.750254', 'step': 7230, 'epoch': 3} {'type': 'loss', 'content': 0.004536677151918411, 'timestamp': '2025-09-10 02:33:17.751864', 'step': 7231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:17.780717', 'step': 7231, 'epoch': 3} {'type': 'loss', 'content': 0.0028503385838121176, 'timestamp': '2025-09-10 02:33:17.804370', 'step': 7232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:17.833782', 'step': 7232, 'epoch': 3} {'type': 'loss', 'content': 0.00026749540120363235, 'timestamp': '2025-09-10 02:33:17.835843', 'step': 7233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:17.864780', 'step': 7233, 'epoch': 3} {'type': 'loss', 'content': 0.0003853130911011249, 'timestamp': '2025-09-10 02:33:17.866775', 'step': 7234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:17.895639', 'step': 7234, 'epoch': 3} {'type': 'loss', 'content': 0.0024504235479980707, 'timestamp': '2025-09-10 02:33:17.897470', 'step': 7235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:17.926693', 'step': 7235, 'epoch': 3} {'type': 'loss', 'content': 0.000501504517160356, 'timestamp': '2025-09-10 02:33:17.950055', 'step': 7236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:17.978974', 'step': 7236, 'epoch': 3} {'type': 'loss', 'content': 0.002365201013162732, 'timestamp': '2025-09-10 02:33:17.980834', 'step': 7237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:18.009879', 'step': 7237, 'epoch': 3} {'type': 'loss', 'content': 0.00019209722813684493, 'timestamp': '2025-09-10 02:33:18.012022', 'step': 7238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:18.040879', 'step': 7238, 'epoch': 3} {'type': 'loss', 'content': 0.0002008116280194372, 'timestamp': '2025-09-10 02:33:18.042859', 'step': 7239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:18.071853', 'step': 7239, 'epoch': 3} {'type': 'loss', 'content': 0.0011329955887049437, 'timestamp': '2025-09-10 02:33:18.095341', 'step': 7240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:18.124024', 'step': 7240, 'epoch': 3} {'type': 'loss', 'content': 0.001901619485579431, 'timestamp': '2025-09-10 02:33:18.125882', 'step': 7241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:18.154340', 'step': 7241, 'epoch': 3} {'type': 'loss', 'content': 0.00016872587730176747, 'timestamp': '2025-09-10 02:33:18.156169', 'step': 7242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:18.184947', 'step': 7242, 'epoch': 3} {'type': 'loss', 'content': 0.00016339278954546899, 'timestamp': '2025-09-10 02:33:18.186960', 'step': 7243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:18.223386', 'step': 7243, 'epoch': 3} {'type': 'loss', 'content': 0.0001435885496903211, 'timestamp': '2025-09-10 02:33:18.246774', 'step': 7244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:18.284002', 'step': 7244, 'epoch': 3} {'type': 'loss', 'content': 0.0005802605883218348, 'timestamp': '2025-09-10 02:33:18.285891', 'step': 7245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:18.325737', 'step': 7245, 'epoch': 3} {'type': 'loss', 'content': 0.0002150183281628415, 'timestamp': '2025-09-10 02:33:18.327545', 'step': 7246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:18.363723', 'step': 7246, 'epoch': 3} {'type': 'loss', 'content': 0.0005969787598587573, 'timestamp': '2025-09-10 02:33:18.365507', 'step': 7247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:18.406517', 'step': 7247, 'epoch': 3} {'type': 'loss', 'content': 0.00018962433387059718, 'timestamp': '2025-09-10 02:33:18.429917', 'step': 7248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:18.467873', 'step': 7248, 'epoch': 3} {'type': 'loss', 'content': 0.0004311216180212796, 'timestamp': '2025-09-10 02:33:18.469742', 'step': 7249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:18.501204', 'step': 7249, 'epoch': 3} {'type': 'loss', 'content': 0.00030078229610808194, 'timestamp': '2025-09-10 02:33:18.503033', 'step': 7250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:18.531969', 'step': 7250, 'epoch': 3} {'type': 'loss', 'content': 0.001691844081506133, 'timestamp': '2025-09-10 02:33:18.533836', 'step': 7251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:18.562872', 'step': 7251, 'epoch': 3} {'type': 'loss', 'content': 0.00035047222627326846, 'timestamp': '2025-09-10 02:33:18.586471', 'step': 7252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:18.616331', 'step': 7252, 'epoch': 3} {'type': 'loss', 'content': 0.0048087830655276775, 'timestamp': '2025-09-10 02:33:18.618190', 'step': 7253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:18.647803', 'step': 7253, 'epoch': 3} {'type': 'loss', 'content': 0.0001972487661987543, 'timestamp': '2025-09-10 02:33:18.649643', 'step': 7254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:18.679033', 'step': 7254, 'epoch': 3} {'type': 'loss', 'content': 0.016567738726735115, 'timestamp': '2025-09-10 02:33:18.680853', 'step': 7255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:18.709967', 'step': 7255, 'epoch': 3} {'type': 'loss', 'content': 0.0005024011479690671, 'timestamp': '2025-09-10 02:33:18.733329', 'step': 7256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:18.762432', 'step': 7256, 'epoch': 3} {'type': 'loss', 'content': 0.00013165673590265214, 'timestamp': '2025-09-10 02:33:18.764238', 'step': 7257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:18.794587', 'step': 7257, 'epoch': 3} {'type': 'loss', 'content': 0.00011971175990765914, 'timestamp': '2025-09-10 02:33:18.796424', 'step': 7258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:18.832080', 'step': 7258, 'epoch': 3} {'type': 'loss', 'content': 0.00018842818099074066, 'timestamp': '2025-09-10 02:33:18.833896', 'step': 7259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:18.863340', 'step': 7259, 'epoch': 3} {'type': 'loss', 'content': 0.0001055440297932364, 'timestamp': '2025-09-10 02:33:18.886684', 'step': 7260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:18.915970', 'step': 7260, 'epoch': 3} {'type': 'loss', 'content': 0.0024492177180945873, 'timestamp': '2025-09-10 02:33:18.917834', 'step': 7261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:18.946317', 'step': 7261, 'epoch': 3} {'type': 'loss', 'content': 0.0010057402541860938, 'timestamp': '2025-09-10 02:33:18.948411', 'step': 7262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:18.977942', 'step': 7262, 'epoch': 3} {'type': 'loss', 'content': 0.000282501510810107, 'timestamp': '2025-09-10 02:33:18.979915', 'step': 7263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:19.008539', 'step': 7263, 'epoch': 3} {'type': 'loss', 'content': 0.00039048882899805903, 'timestamp': '2025-09-10 02:33:19.032076', 'step': 7264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:19.061141', 'step': 7264, 'epoch': 3} {'type': 'loss', 'content': 0.00016508702537976205, 'timestamp': '2025-09-10 02:33:19.063093', 'step': 7265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:19.091554', 'step': 7265, 'epoch': 3} {'type': 'loss', 'content': 0.0063108946196734905, 'timestamp': '2025-09-10 02:33:19.093607', 'step': 7266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:19.122885', 'step': 7266, 'epoch': 3} {'type': 'loss', 'content': 0.0014061091933399439, 'timestamp': '2025-09-10 02:33:19.124818', 'step': 7267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:19.153332', 'step': 7267, 'epoch': 3} {'type': 'loss', 'content': 0.00014290912076830864, 'timestamp': '2025-09-10 02:33:19.176671', 'step': 7268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:19.205694', 'step': 7268, 'epoch': 3} {'type': 'loss', 'content': 0.0021674942690879107, 'timestamp': '2025-09-10 02:33:19.207573', 'step': 7269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:19.236012', 'step': 7269, 'epoch': 3} {'type': 'loss', 'content': 4.766025813296437e-05, 'timestamp': '2025-09-10 02:33:19.237925', 'step': 7270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:19.266853', 'step': 7270, 'epoch': 3} {'type': 'loss', 'content': 0.00015322669059969485, 'timestamp': '2025-09-10 02:33:19.268882', 'step': 7271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-10 02:33:19.297662', 'step': 7271, 'epoch': 3} {'type': 'loss', 'content': 0.010051853954792023, 'timestamp': '2025-09-10 02:33:19.321165', 'step': 7272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:19.350138', 'step': 7272, 'epoch': 3} {'type': 'loss', 'content': 0.0027771706227213144, 'timestamp': '2025-09-10 02:33:19.351844', 'step': 7273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:19.380153', 'step': 7273, 'epoch': 3} {'type': 'loss', 'content': 0.00017319263133686036, 'timestamp': '2025-09-10 02:33:19.381912', 'step': 7274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:19.410927', 'step': 7274, 'epoch': 3} {'type': 'loss', 'content': 0.008278073742985725, 'timestamp': '2025-09-10 02:33:19.412828', 'step': 7275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:19.442168', 'step': 7275, 'epoch': 3} {'type': 'loss', 'content': 0.0010432031704112887, 'timestamp': '2025-09-10 02:33:19.465498', 'step': 7276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-10 02:33:19.494858', 'step': 7276, 'epoch': 3} {'type': 'loss', 'content': 0.0011932385386899114, 'timestamp': '2025-09-10 02:33:19.496498', 'step': 7277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:19.525258', 'step': 7277, 'epoch': 3} {'type': 'loss', 'content': 0.00030725193209946156, 'timestamp': '2025-09-10 02:33:19.526737', 'step': 7278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:19.555431', 'step': 7278, 'epoch': 3} {'type': 'loss', 'content': 0.00044731449452228844, 'timestamp': '2025-09-10 02:33:19.557418', 'step': 7279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:19.586302', 'step': 7279, 'epoch': 3} {'type': 'loss', 'content': 0.0009866936597973108, 'timestamp': '2025-09-10 02:33:19.609848', 'step': 7280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:19.638582', 'step': 7280, 'epoch': 3} {'type': 'loss', 'content': 0.00012599120964296162, 'timestamp': '2025-09-10 02:33:19.640585', 'step': 7281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:19.669761', 'step': 7281, 'epoch': 3} {'type': 'loss', 'content': 0.000897696299944073, 'timestamp': '2025-09-10 02:33:19.671547', 'step': 7282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:19.700172', 'step': 7282, 'epoch': 3} {'type': 'loss', 'content': 5.4524887673323974e-05, 'timestamp': '2025-09-10 02:33:19.702176', 'step': 7283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:19.730697', 'step': 7283, 'epoch': 3} {'type': 'loss', 'content': 6.54960676911287e-05, 'timestamp': '2025-09-10 02:33:19.754038', 'step': 7284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:19.782691', 'step': 7284, 'epoch': 3} {'type': 'loss', 'content': 7.324916805373505e-05, 'timestamp': '2025-09-10 02:33:19.784657', 'step': 7285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:19.813236', 'step': 7285, 'epoch': 3} {'type': 'loss', 'content': 0.001871999935247004, 'timestamp': '2025-09-10 02:33:19.815186', 'step': 7286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:19.844561', 'step': 7286, 'epoch': 3} {'type': 'loss', 'content': 0.0006715627387166023, 'timestamp': '2025-09-10 02:33:19.846524', 'step': 7287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:19.875929', 'step': 7287, 'epoch': 3} {'type': 'loss', 'content': 0.0017241544555872679, 'timestamp': '2025-09-10 02:33:19.899361', 'step': 7288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:19.928722', 'step': 7288, 'epoch': 3} {'type': 'loss', 'content': 7.862582424422726e-05, 'timestamp': '2025-09-10 02:33:19.930836', 'step': 7289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:19.959634', 'step': 7289, 'epoch': 3} {'type': 'loss', 'content': 9.589677210897207e-05, 'timestamp': '2025-09-10 02:33:19.961331', 'step': 7290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:19.990243', 'step': 7290, 'epoch': 3} {'type': 'loss', 'content': 0.000171039835549891, 'timestamp': '2025-09-10 02:33:19.992001', 'step': 7291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:20.021008', 'step': 7291, 'epoch': 3} {'type': 'loss', 'content': 0.0294545479118824, 'timestamp': '2025-09-10 02:33:20.044219', 'step': 7292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:20.073064', 'step': 7292, 'epoch': 3} {'type': 'loss', 'content': 0.0008927856688387692, 'timestamp': '2025-09-10 02:33:20.074867', 'step': 7293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:20.103592', 'step': 7293, 'epoch': 3} {'type': 'loss', 'content': 5.344643795979209e-05, 'timestamp': '2025-09-10 02:33:20.105201', 'step': 7294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:20.133819', 'step': 7294, 'epoch': 3} {'type': 'loss', 'content': 0.010886363685131073, 'timestamp': '2025-09-10 02:33:20.135483', 'step': 7295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:20.164693', 'step': 7295, 'epoch': 3} {'type': 'loss', 'content': 6.454643153119832e-05, 'timestamp': '2025-09-10 02:33:20.188123', 'step': 7296, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1582003754624}], 'timestamp': '2025-09-10 02:33:22.036152', 'step': 7296, 'epoch': 3} {'type': 'pplx', 'content': 2553532.4689230984, 'timestamp': '2025-09-10 02:33:22.037804', 'step': 7296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:22.065870', 'step': 7296, 'epoch': 3} {'type': 'loss', 'content': 0.0003278447256889194, 'timestamp': '2025-09-10 02:33:22.067523', 'step': 7297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:22.096448', 'step': 7297, 'epoch': 3} {'type': 'loss', 'content': 0.0003597289905883372, 'timestamp': '2025-09-10 02:33:22.098274', 'step': 7298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:22.126866', 'step': 7298, 'epoch': 3} {'type': 'loss', 'content': 0.00020632542145904154, 'timestamp': '2025-09-10 02:33:22.128565', 'step': 7299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:22.157032', 'step': 7299, 'epoch': 3} {'type': 'loss', 'content': 0.0006207999540492892, 'timestamp': '2025-09-10 02:33:22.180334', 'step': 7300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:22.212634', 'step': 7300, 'epoch': 3} {'type': 'loss', 'content': 0.00022598991927225143, 'timestamp': '2025-09-10 02:33:22.214539', 'step': 7301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:22.247024', 'step': 7301, 'epoch': 3} {'type': 'loss', 'content': 9.620003402233124e-05, 'timestamp': '2025-09-10 02:33:22.248826', 'step': 7302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:22.287957', 'step': 7302, 'epoch': 3} {'type': 'loss', 'content': 0.0012993158306926489, 'timestamp': '2025-09-10 02:33:22.289807', 'step': 7303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:22.330945', 'step': 7303, 'epoch': 3} {'type': 'loss', 'content': 0.012171934358775616, 'timestamp': '2025-09-10 02:33:22.355472', 'step': 7304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:22.393027', 'step': 7304, 'epoch': 3} {'type': 'loss', 'content': 0.00029418981284834445, 'timestamp': '2025-09-10 02:33:22.394907', 'step': 7305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:22.435429', 'step': 7305, 'epoch': 3} {'type': 'loss', 'content': 0.0052917273715138435, 'timestamp': '2025-09-10 02:33:22.437309', 'step': 7306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-10 02:33:22.474407', 'step': 7306, 'epoch': 3} {'type': 'loss', 'content': 8.426891872659326e-05, 'timestamp': '2025-09-10 02:33:22.476128', 'step': 7307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [1, 80], 'flops': 593517404912}, 'timestamp': '2025-09-10 02:33:22.506700', 'step': 7307, 'epoch': 3} {'type': 'loss', 'content': 1.3720730748900678e-05, 'timestamp': '2025-09-10 02:33:22.530117', 'step': 7308, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [5, 80], 'batch_size': 8, 'flops': 1582003754624}], 'timestamp': '2025-09-10 02:33:24.407419', 'step': 7308, 'epoch': 3} {'type': 'pplx', 'content': 2536462.915071921, 'timestamp': '2025-09-10 02:33:24.409286', 'step': 7308, 'epoch': 3} {'type': 'best_pplx', 'content': 2010640.6963496492, 'timestamp': '2025-09-10 02:33:24.410280', 'step': 7308, 'epoch': 3} {'type': 'best_step', 'content': 5016, 'timestamp': '2025-09-10 02:33:24.411283', 'step': 7308, 'epoch': 3} {'type': 'total_pplx_flops', 'content': 9808423376665600, 'timestamp': '2025-09-10 02:33:24.412339', 'step': 7308, 'epoch': 3} {'type': 'total_train_flops', 'content': 20896705064436048, 'timestamp': '2025-09-10 02:33:24.413721', 'step': 7308, 'epoch': 3}