{ "training_args": { "output_dir": "/sc/projects/sci-herbrich/chair/lora-bp/valentin.teutschbein/adapters/nlu_commonsense_qa_ff_v1", "overwrite_output_dir": false, "do_train": false, "do_eval": true, "do_predict": false, "eval_strategy": "steps", "prediction_loss_only": false, "per_device_train_batch_size": 4, "per_device_eval_batch_size": 8, "per_gpu_train_batch_size": null, "per_gpu_eval_batch_size": null, "gradient_accumulation_steps": 4, "eval_accumulation_steps": null, "eval_delay": 0, "torch_empty_cache_steps": null, "learning_rate": 2e-05, "weight_decay": 0.0, "adam_beta1": 0.9, "adam_beta2": 0.999, "adam_epsilon": 1e-08, "max_grad_norm": 1.0, "num_train_epochs": 3, "max_steps": -1, "lr_scheduler_type": "linear", "lr_scheduler_kwargs": {}, "warmup_ratio": 0.0, "warmup_steps": 0, "log_level": "passive", "log_level_replica": "warning", "log_on_each_node": true, "logging_dir": "/sc/projects/sci-herbrich/chair/lora-bp/valentin.teutschbein/adapters/nlu_commonsense_qa_ff_v1/runs/Sep10_02-26-03_gx07", "logging_strategy": "steps", "logging_first_step": false, "logging_steps": 20, "logging_nan_inf_filter": true, "save_strategy": "epoch", "save_steps": 500, "save_total_limit": null, "save_safetensors": true, "save_on_each_node": false, "save_only_model": false, "restore_callback_states_from_checkpoint": false, "no_cuda": false, "use_cpu": false, "use_mps_device": false, "seed": 42, "data_seed": null, "jit_mode_eval": false, "use_ipex": false, "bf16": false, "fp16": false, "fp16_opt_level": "O1", "half_precision_backend": "auto", "bf16_full_eval": false, "fp16_full_eval": false, "tf32": null, "local_rank": 0, "ddp_backend": null, "tpu_num_cores": null, "tpu_metrics_debug": false, "debug": [], "dataloader_drop_last": false, "eval_steps": 152, "dataloader_num_workers": 0, "dataloader_prefetch_factor": null, "past_index": -1, "run_name": "/sc/projects/sci-herbrich/chair/lora-bp/valentin.teutschbein/adapters/nlu_commonsense_qa_ff_v1", "disable_tqdm": false, "remove_unused_columns": true, "label_names": null, "load_best_model_at_end": false, "metric_for_best_model": null, "greater_is_better": null, "ignore_data_skip": false, "fsdp": [], "fsdp_min_num_params": 0, "fsdp_config": { "min_num_params": 0, "xla": false, "xla_fsdp_v2": false, "xla_fsdp_grad_ckpt": false }, "fsdp_transformer_layer_cls_to_wrap": null, "accelerator_config": { "split_batches": false, "dispatch_batches": null, "even_batches": true, "use_seedable_sampler": true, "non_blocking": false, "gradient_accumulation_kwargs": null }, "deepspeed": null, "label_smoothing_factor": 0.0, "optim": "adamw_torch", "optim_args": null, "adafactor": false, "group_by_length": false, "length_column_name": "length", "report_to": [], "ddp_find_unused_parameters": null, "ddp_bucket_cap_mb": null, "ddp_broadcast_buffers": null, "dataloader_pin_memory": true, "dataloader_persistent_workers": false, "skip_memory_metrics": true, "use_legacy_prediction_loop": false, "push_to_hub": false, "resume_from_checkpoint": null, "hub_model_id": null, "hub_strategy": "every_save", "hub_token": "", "hub_private_repo": null, "hub_always_push": false, "gradient_checkpointing": false, "gradient_checkpointing_kwargs": null, "include_inputs_for_metrics": false, "include_for_metrics": [], "eval_do_concat_batches": true, "fp16_backend": "auto", "push_to_hub_model_id": null, "push_to_hub_organization": null, "push_to_hub_token": "", "mp_parameters": "", "auto_find_batch_size": false, "full_determinism": false, "torchdynamo": null, "ray_scope": "last", "ddp_timeout": 1800, "torch_compile": false, "torch_compile_backend": null, "torch_compile_mode": null, "include_tokens_per_second": false, "include_num_input_tokens_seen": false, "neftune_noise_alpha": null, "optim_target_modules": null, "batch_eval_metrics": false, "eval_on_start": false, "use_liger_kernel": false, "eval_use_gather_object": false, "average_tokens_across_devices": false }, "lora_config": null, "flops": { "eval": 9808423376665600, "train": 20896705064436048, "total": 30705128441101648 }, "total_energy": 12.902750000000001, "logs": [ { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 96 ], "batch_size": 8, "flops": 1898404492032 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 5, 80 ], "batch_size": 8, "flops": 1582003754624 } ], "timestamp": "2025-09-10 02:26:13.524298", "step": 0, "epoch": 0 }, { "type": "pplx", "content": 68890406.29865518, "timestamp": "2025-09-10 02:26:13.526235", "step": 0, "epoch": 0 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:13.592622", "step": 0, "epoch": 1 }, { "type": "loss", "content": 0.784187912940979, "timestamp": "2025-09-10 02:26:13.594173", "step": 1, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:13.623535", "step": 1, "epoch": 1 }, { "type": "loss", "content": 0.9402614235877991, "timestamp": "2025-09-10 02:26:13.624815", "step": 2, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:13.652804", "step": 2, "epoch": 1 }, { "type": "loss", "content": 0.9695280194282532, "timestamp": "2025-09-10 02:26:13.654601", "step": 3, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:13.682671", "step": 3, "epoch": 1 }, { "type": "loss", "content": 0.8488698601722717, "timestamp": "2025-09-10 02:26:13.772469", "step": 4, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:13.802136", "step": 4, "epoch": 1 }, { "type": "loss", "content": 0.0741942897439003, "timestamp": "2025-09-10 02:26:13.803521", "step": 5, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:13.832478", "step": 5, "epoch": 1 }, { "type": "loss", "content": 0.08400722593069077, "timestamp": "2025-09-10 02:26:13.834006", "step": 6, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:13.862051", "step": 6, "epoch": 1 }, { "type": "loss", "content": 0.08074257522821426, "timestamp": "2025-09-10 02:26:13.863266", "step": 7, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:13.891901", "step": 7, "epoch": 1 }, { "type": "loss", "content": 0.09530693292617798, "timestamp": "2025-09-10 02:26:13.915100", "step": 8, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:13.965999", "step": 8, "epoch": 1 }, { "type": "loss", "content": 0.05420571565628052, "timestamp": "2025-09-10 02:26:13.967577", "step": 9, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:14.014487", "step": 9, "epoch": 1 }, { "type": "loss", "content": 0.04895346984267235, "timestamp": "2025-09-10 02:26:14.015879", "step": 10, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:14.044805", "step": 10, "epoch": 1 }, { "type": "loss", "content": 0.05694868788123131, "timestamp": "2025-09-10 02:26:14.046849", "step": 11, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:14.075117", "step": 11, "epoch": 1 }, { "type": "loss", "content": 0.05808507651090622, "timestamp": "2025-09-10 02:26:14.098051", "step": 12, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:14.126896", "step": 12, "epoch": 1 }, { "type": "loss", "content": 0.06551863998174667, "timestamp": "2025-09-10 02:26:14.128469", "step": 13, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:14.156688", "step": 13, "epoch": 1 }, { "type": "loss", "content": 0.054176900535821915, "timestamp": "2025-09-10 02:26:14.158099", "step": 14, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:14.186202", "step": 14, "epoch": 1 }, { "type": "loss", "content": 0.0339355394244194, "timestamp": "2025-09-10 02:26:14.187762", "step": 15, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:14.215858", "step": 15, "epoch": 1 }, { "type": "loss", "content": 0.041469670832157135, "timestamp": "2025-09-10 02:26:14.238658", "step": 16, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:14.267079", "step": 16, "epoch": 1 }, { "type": "loss", "content": 0.06128307431936264, "timestamp": "2025-09-10 02:26:14.268524", "step": 17, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:14.297136", "step": 17, "epoch": 1 }, { "type": "loss", "content": 0.03547512739896774, "timestamp": "2025-09-10 02:26:14.298573", "step": 18, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:14.327130", "step": 18, "epoch": 1 }, { "type": "loss", "content": 0.08370207995176315, "timestamp": "2025-09-10 02:26:14.328766", "step": 19, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:14.356877", "step": 19, "epoch": 1 }, { "type": "loss", "content": 0.050863347947597504, "timestamp": "2025-09-10 02:26:14.379643", "step": 20, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:14.412795", "step": 20, "epoch": 1 }, { "type": "loss", "content": 0.0537407211959362, "timestamp": "2025-09-10 02:26:14.414235", "step": 21, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:14.442911", "step": 21, "epoch": 1 }, { "type": "loss", "content": 0.05583806708455086, "timestamp": "2025-09-10 02:26:14.444284", "step": 22, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:14.473247", "step": 22, "epoch": 1 }, { "type": "loss", "content": 0.04571468010544777, "timestamp": "2025-09-10 02:26:14.475403", "step": 23, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:14.504448", "step": 23, "epoch": 1 }, { "type": "loss", "content": 0.041455768048763275, "timestamp": "2025-09-10 02:26:14.527447", "step": 24, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:14.555519", "step": 24, "epoch": 1 }, { "type": "loss", "content": 0.04378771409392357, "timestamp": "2025-09-10 02:26:14.557068", "step": 25, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:14.585593", "step": 25, "epoch": 1 }, { "type": "loss", "content": 0.04339175671339035, "timestamp": "2025-09-10 02:26:14.587165", "step": 26, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:14.615648", "step": 26, "epoch": 1 }, { "type": "loss", "content": 0.04379519447684288, "timestamp": "2025-09-10 02:26:14.617229", "step": 27, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:14.645665", "step": 27, "epoch": 1 }, { "type": "loss", "content": 0.041180964559316635, "timestamp": "2025-09-10 02:26:14.668564", "step": 28, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:14.697113", "step": 28, "epoch": 1 }, { "type": "loss", "content": 0.022096460685133934, "timestamp": "2025-09-10 02:26:14.698549", "step": 29, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:14.726836", "step": 29, "epoch": 1 }, { "type": "loss", "content": 0.053119901567697525, "timestamp": "2025-09-10 02:26:14.728042", "step": 30, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:14.756017", "step": 30, "epoch": 1 }, { "type": "loss", "content": 0.031400687992572784, "timestamp": "2025-09-10 02:26:14.757476", "step": 31, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:14.785833", "step": 31, "epoch": 1 }, { "type": "loss", "content": 0.045110367238521576, "timestamp": "2025-09-10 02:26:14.808980", "step": 32, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:14.837430", "step": 32, "epoch": 1 }, { "type": "loss", "content": 0.054868753999471664, "timestamp": "2025-09-10 02:26:14.839041", "step": 33, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:14.867358", "step": 33, "epoch": 1 }, { "type": "loss", "content": 0.0636897161602974, "timestamp": "2025-09-10 02:26:14.868799", "step": 34, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:14.897213", "step": 34, "epoch": 1 }, { "type": "loss", "content": 0.04060392826795578, "timestamp": "2025-09-10 02:26:14.898677", "step": 35, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:14.927090", "step": 35, "epoch": 1 }, { "type": "loss", "content": 0.04056873545050621, "timestamp": "2025-09-10 02:26:14.949950", "step": 36, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:14.979350", "step": 36, "epoch": 1 }, { "type": "loss", "content": 0.03306897357106209, "timestamp": "2025-09-10 02:26:14.980982", "step": 37, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:15.010152", "step": 37, "epoch": 1 }, { "type": "loss", "content": 0.037819407880306244, "timestamp": "2025-09-10 02:26:15.011768", "step": 38, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:15.040529", "step": 38, "epoch": 1 }, { "type": "loss", "content": 0.02081454172730446, "timestamp": "2025-09-10 02:26:15.042188", "step": 39, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:15.071219", "step": 39, "epoch": 1 }, { "type": "loss", "content": 0.03982502594590187, "timestamp": "2025-09-10 02:26:15.094237", "step": 40, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:15.122803", "step": 40, "epoch": 1 }, { "type": "loss", "content": 0.04330352321267128, "timestamp": "2025-09-10 02:26:15.124383", "step": 41, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:15.153283", "step": 41, "epoch": 1 }, { "type": "loss", "content": 0.03618653863668442, "timestamp": "2025-09-10 02:26:15.154850", "step": 42, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:15.183447", "step": 42, "epoch": 1 }, { "type": "loss", "content": 0.038391124457120895, "timestamp": "2025-09-10 02:26:15.185005", "step": 43, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:15.213025", "step": 43, "epoch": 1 }, { "type": "loss", "content": 0.08018808811903, "timestamp": "2025-09-10 02:26:15.236052", "step": 44, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:15.264797", "step": 44, "epoch": 1 }, { "type": "loss", "content": 0.02720540761947632, "timestamp": "2025-09-10 02:26:15.266308", "step": 45, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:15.294802", "step": 45, "epoch": 1 }, { "type": "loss", "content": 0.05708106607198715, "timestamp": "2025-09-10 02:26:15.296254", "step": 46, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:15.324914", "step": 46, "epoch": 1 }, { "type": "loss", "content": 0.03747594356536865, "timestamp": "2025-09-10 02:26:15.326435", "step": 47, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:15.355563", "step": 47, "epoch": 1 }, { "type": "loss", "content": 0.0383264534175396, "timestamp": "2025-09-10 02:26:15.378617", "step": 48, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:15.407519", "step": 48, "epoch": 1 }, { "type": "loss", "content": 0.052838169038295746, "timestamp": "2025-09-10 02:26:15.408927", "step": 49, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:15.437592", "step": 49, "epoch": 1 }, { "type": "loss", "content": 0.04840589687228203, "timestamp": "2025-09-10 02:26:15.439012", "step": 50, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:15.467498", "step": 50, "epoch": 1 }, { "type": "loss", "content": 0.08914273232221603, "timestamp": "2025-09-10 02:26:15.468930", "step": 51, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:26:15.503794", "step": 51, "epoch": 1 }, { "type": "loss", "content": 0.02010265178978443, "timestamp": "2025-09-10 02:26:15.526986", "step": 52, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:15.557385", "step": 52, "epoch": 1 }, { "type": "loss", "content": 0.09015186876058578, "timestamp": "2025-09-10 02:26:15.559220", "step": 53, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:15.587952", "step": 53, "epoch": 1 }, { "type": "loss", "content": 0.015328974463045597, "timestamp": "2025-09-10 02:26:15.589775", "step": 54, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:15.618275", "step": 54, "epoch": 1 }, { "type": "loss", "content": 0.035104621201753616, "timestamp": "2025-09-10 02:26:15.620102", "step": 55, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:15.648987", "step": 55, "epoch": 1 }, { "type": "loss", "content": 0.056207407265901566, "timestamp": "2025-09-10 02:26:15.672216", "step": 56, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:15.701206", "step": 56, "epoch": 1 }, { "type": "loss", "content": 0.009822872467339039, "timestamp": "2025-09-10 02:26:15.703009", "step": 57, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:15.731703", "step": 57, "epoch": 1 }, { "type": "loss", "content": 0.04551895335316658, "timestamp": "2025-09-10 02:26:15.733492", "step": 58, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:15.762284", "step": 58, "epoch": 1 }, { "type": "loss", "content": 0.05960583686828613, "timestamp": "2025-09-10 02:26:15.764183", "step": 59, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:15.792963", "step": 59, "epoch": 1 }, { "type": "loss", "content": 0.0849723145365715, "timestamp": "2025-09-10 02:26:15.816148", "step": 60, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:15.845084", "step": 60, "epoch": 1 }, { "type": "loss", "content": 0.01958092674612999, "timestamp": "2025-09-10 02:26:15.847104", "step": 61, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:15.875888", "step": 61, "epoch": 1 }, { "type": "loss", "content": 0.04763122275471687, "timestamp": "2025-09-10 02:26:15.877522", "step": 62, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:15.906937", "step": 62, "epoch": 1 }, { "type": "loss", "content": 0.05476832017302513, "timestamp": "2025-09-10 02:26:15.908576", "step": 63, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:15.936928", "step": 63, "epoch": 1 }, { "type": "loss", "content": 0.043183039873838425, "timestamp": "2025-09-10 02:26:15.959969", "step": 64, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:15.989583", "step": 64, "epoch": 1 }, { "type": "loss", "content": 0.06260648369789124, "timestamp": "2025-09-10 02:26:15.991986", "step": 65, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:16.020682", "step": 65, "epoch": 1 }, { "type": "loss", "content": 0.024250363931059837, "timestamp": "2025-09-10 02:26:16.022485", "step": 66, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:16.051140", "step": 66, "epoch": 1 }, { "type": "loss", "content": 0.024692872539162636, "timestamp": "2025-09-10 02:26:16.052889", "step": 67, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:16.084626", "step": 67, "epoch": 1 }, { "type": "loss", "content": 0.07003824412822723, "timestamp": "2025-09-10 02:26:16.107891", "step": 68, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:16.136698", "step": 68, "epoch": 1 }, { "type": "loss", "content": 0.11602479964494705, "timestamp": "2025-09-10 02:26:16.138083", "step": 69, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:16.167096", "step": 69, "epoch": 1 }, { "type": "loss", "content": 0.0395672507584095, "timestamp": "2025-09-10 02:26:16.168597", "step": 70, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:16.197674", "step": 70, "epoch": 1 }, { "type": "loss", "content": 0.05587854981422424, "timestamp": "2025-09-10 02:26:16.199052", "step": 71, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:16.228575", "step": 71, "epoch": 1 }, { "type": "loss", "content": 0.016235610470175743, "timestamp": "2025-09-10 02:26:16.251865", "step": 72, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:16.281543", "step": 72, "epoch": 1 }, { "type": "loss", "content": 0.04589106887578964, "timestamp": "2025-09-10 02:26:16.283174", "step": 73, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:16.313168", "step": 73, "epoch": 1 }, { "type": "loss", "content": 0.07501351088285446, "timestamp": "2025-09-10 02:26:16.314760", "step": 74, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:16.344629", "step": 74, "epoch": 1 }, { "type": "loss", "content": 0.04023617133498192, "timestamp": "2025-09-10 02:26:16.346140", "step": 75, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:16.375324", "step": 75, "epoch": 1 }, { "type": "loss", "content": 0.043087732046842575, "timestamp": "2025-09-10 02:26:16.398432", "step": 76, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:16.427916", "step": 76, "epoch": 1 }, { "type": "loss", "content": 0.07063813507556915, "timestamp": "2025-09-10 02:26:16.429310", "step": 77, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:16.458330", "step": 77, "epoch": 1 }, { "type": "loss", "content": 0.07721427828073502, "timestamp": "2025-09-10 02:26:16.459764", "step": 78, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:16.489500", "step": 78, "epoch": 1 }, { "type": "loss", "content": 0.08201445639133453, "timestamp": "2025-09-10 02:26:16.490919", "step": 79, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:16.520308", "step": 79, "epoch": 1 }, { "type": "loss", "content": 0.037445347756147385, "timestamp": "2025-09-10 02:26:16.543592", "step": 80, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:26:16.572617", "step": 80, "epoch": 1 }, { "type": "loss", "content": 0.03383129462599754, "timestamp": "2025-09-10 02:26:16.574306", "step": 81, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:16.603888", "step": 81, "epoch": 1 }, { "type": "loss", "content": 0.06920628994703293, "timestamp": "2025-09-10 02:26:16.605804", "step": 82, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:16.634788", "step": 82, "epoch": 1 }, { "type": "loss", "content": 0.057791560888290405, "timestamp": "2025-09-10 02:26:16.636543", "step": 83, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:16.665235", "step": 83, "epoch": 1 }, { "type": "loss", "content": 0.04435967653989792, "timestamp": "2025-09-10 02:26:16.688516", "step": 84, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:16.717461", "step": 84, "epoch": 1 }, { "type": "loss", "content": 0.03706847503781319, "timestamp": "2025-09-10 02:26:16.719282", "step": 85, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:16.748172", "step": 85, "epoch": 1 }, { "type": "loss", "content": 0.031112460419535637, "timestamp": "2025-09-10 02:26:16.749896", "step": 86, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:16.778525", "step": 86, "epoch": 1 }, { "type": "loss", "content": 0.06309755146503448, "timestamp": "2025-09-10 02:26:16.780175", "step": 87, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:16.809312", "step": 87, "epoch": 1 }, { "type": "loss", "content": 0.04229046776890755, "timestamp": "2025-09-10 02:26:16.833521", "step": 88, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:16.862018", "step": 88, "epoch": 1 }, { "type": "loss", "content": 0.02340858057141304, "timestamp": "2025-09-10 02:26:16.863661", "step": 89, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:16.891912", "step": 89, "epoch": 1 }, { "type": "loss", "content": 0.05444183200597763, "timestamp": "2025-09-10 02:26:16.893599", "step": 90, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:16.921943", "step": 90, "epoch": 1 }, { "type": "loss", "content": 0.03168310970067978, "timestamp": "2025-09-10 02:26:16.923859", "step": 91, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:16.952720", "step": 91, "epoch": 1 }, { "type": "loss", "content": 0.0278034545481205, "timestamp": "2025-09-10 02:26:16.976018", "step": 92, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:17.005150", "step": 92, "epoch": 1 }, { "type": "loss", "content": 0.03337569162249565, "timestamp": "2025-09-10 02:26:17.006996", "step": 93, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:17.035422", "step": 93, "epoch": 1 }, { "type": "loss", "content": 0.04794316738843918, "timestamp": "2025-09-10 02:26:17.037023", "step": 94, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:17.065605", "step": 94, "epoch": 1 }, { "type": "loss", "content": 0.04414382576942444, "timestamp": "2025-09-10 02:26:17.067204", "step": 95, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:17.095901", "step": 95, "epoch": 1 }, { "type": "loss", "content": 0.04380037635564804, "timestamp": "2025-09-10 02:26:17.119020", "step": 96, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:17.148381", "step": 96, "epoch": 1 }, { "type": "loss", "content": 0.04589477926492691, "timestamp": "2025-09-10 02:26:17.150063", "step": 97, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:17.178477", "step": 97, "epoch": 1 }, { "type": "loss", "content": 0.03385995700955391, "timestamp": "2025-09-10 02:26:17.180652", "step": 98, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:17.208976", "step": 98, "epoch": 1 }, { "type": "loss", "content": 0.040779177099466324, "timestamp": "2025-09-10 02:26:17.210594", "step": 99, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:17.238862", "step": 99, "epoch": 1 }, { "type": "loss", "content": 0.036285847425460815, "timestamp": "2025-09-10 02:26:17.266483", "step": 100, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:17.298194", "step": 100, "epoch": 1 }, { "type": "loss", "content": 0.026493841782212257, "timestamp": "2025-09-10 02:26:17.302159", "step": 101, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:17.332146", "step": 101, "epoch": 1 }, { "type": "loss", "content": 0.02587372623383999, "timestamp": "2025-09-10 02:26:17.333544", "step": 102, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:17.363048", "step": 102, "epoch": 1 }, { "type": "loss", "content": 0.04184553027153015, "timestamp": "2025-09-10 02:26:17.364496", "step": 103, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:17.393032", "step": 103, "epoch": 1 }, { "type": "loss", "content": 0.04045234993100166, "timestamp": "2025-09-10 02:26:17.418098", "step": 104, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:17.446652", "step": 104, "epoch": 1 }, { "type": "loss", "content": 0.015993589535355568, "timestamp": "2025-09-10 02:26:17.448264", "step": 105, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:17.476805", "step": 105, "epoch": 1 }, { "type": "loss", "content": 0.03624787554144859, "timestamp": "2025-09-10 02:26:17.478193", "step": 106, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:17.507475", "step": 106, "epoch": 1 }, { "type": "loss", "content": 0.03354557603597641, "timestamp": "2025-09-10 02:26:17.509125", "step": 107, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:17.536951", "step": 107, "epoch": 1 }, { "type": "loss", "content": 0.05234936624765396, "timestamp": "2025-09-10 02:26:17.559899", "step": 108, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:17.588316", "step": 108, "epoch": 1 }, { "type": "loss", "content": 0.026534216478466988, "timestamp": "2025-09-10 02:26:17.590009", "step": 109, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:17.618304", "step": 109, "epoch": 1 }, { "type": "loss", "content": 0.05735252425074577, "timestamp": "2025-09-10 02:26:17.620170", "step": 110, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:17.649166", "step": 110, "epoch": 1 }, { "type": "loss", "content": 0.053705159574747086, "timestamp": "2025-09-10 02:26:17.652293", "step": 111, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:17.683236", "step": 111, "epoch": 1 }, { "type": "loss", "content": 0.044398847967386246, "timestamp": "2025-09-10 02:26:17.706556", "step": 112, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:17.735203", "step": 112, "epoch": 1 }, { "type": "loss", "content": 0.03528786823153496, "timestamp": "2025-09-10 02:26:17.737352", "step": 113, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:17.766273", "step": 113, "epoch": 1 }, { "type": "loss", "content": 0.06282095611095428, "timestamp": "2025-09-10 02:26:17.768071", "step": 114, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:17.796683", "step": 114, "epoch": 1 }, { "type": "loss", "content": 0.06083959341049194, "timestamp": "2025-09-10 02:26:17.798374", "step": 115, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:17.826424", "step": 115, "epoch": 1 }, { "type": "loss", "content": 0.04757341742515564, "timestamp": "2025-09-10 02:26:17.849588", "step": 116, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:17.878164", "step": 116, "epoch": 1 }, { "type": "loss", "content": 0.04990594834089279, "timestamp": "2025-09-10 02:26:17.879835", "step": 117, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:17.908676", "step": 117, "epoch": 1 }, { "type": "loss", "content": 0.03153015300631523, "timestamp": "2025-09-10 02:26:17.910502", "step": 118, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:17.939098", "step": 118, "epoch": 1 }, { "type": "loss", "content": 0.03902817517518997, "timestamp": "2025-09-10 02:26:17.940863", "step": 119, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:17.969439", "step": 119, "epoch": 1 }, { "type": "loss", "content": 0.03202887997031212, "timestamp": "2025-09-10 02:26:17.992527", "step": 120, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:18.021926", "step": 120, "epoch": 1 }, { "type": "loss", "content": 0.02192959189414978, "timestamp": "2025-09-10 02:26:18.023304", "step": 121, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:18.051518", "step": 121, "epoch": 1 }, { "type": "loss", "content": 0.028383001685142517, "timestamp": "2025-09-10 02:26:18.052841", "step": 122, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:18.084660", "step": 122, "epoch": 1 }, { "type": "loss", "content": 0.059359509497880936, "timestamp": "2025-09-10 02:26:18.086135", "step": 123, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:18.115037", "step": 123, "epoch": 1 }, { "type": "loss", "content": 0.03712543845176697, "timestamp": "2025-09-10 02:26:18.139005", "step": 124, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:18.168086", "step": 124, "epoch": 1 }, { "type": "loss", "content": 0.07202344387769699, "timestamp": "2025-09-10 02:26:18.169297", "step": 125, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:18.198484", "step": 125, "epoch": 1 }, { "type": "loss", "content": 0.019284551963210106, "timestamp": "2025-09-10 02:26:18.200772", "step": 126, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:18.229103", "step": 126, "epoch": 1 }, { "type": "loss", "content": 0.04529854282736778, "timestamp": "2025-09-10 02:26:18.231013", "step": 127, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:18.259682", "step": 127, "epoch": 1 }, { "type": "loss", "content": 0.036308206617832184, "timestamp": "2025-09-10 02:26:18.282605", "step": 128, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:18.311567", "step": 128, "epoch": 1 }, { "type": "loss", "content": 0.06300818175077438, "timestamp": "2025-09-10 02:26:18.313266", "step": 129, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:18.341832", "step": 129, "epoch": 1 }, { "type": "loss", "content": 0.02641741931438446, "timestamp": "2025-09-10 02:26:18.343609", "step": 130, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:18.371963", "step": 130, "epoch": 1 }, { "type": "loss", "content": 0.03118242882192135, "timestamp": "2025-09-10 02:26:18.373788", "step": 131, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:18.402383", "step": 131, "epoch": 1 }, { "type": "loss", "content": 0.026283519342541695, "timestamp": "2025-09-10 02:26:18.425517", "step": 132, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:18.459956", "step": 132, "epoch": 1 }, { "type": "loss", "content": 0.016796765848994255, "timestamp": "2025-09-10 02:26:18.461783", "step": 133, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:18.490694", "step": 133, "epoch": 1 }, { "type": "loss", "content": 0.011252478696405888, "timestamp": "2025-09-10 02:26:18.492227", "step": 134, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:18.521295", "step": 134, "epoch": 1 }, { "type": "loss", "content": 0.028942221775650978, "timestamp": "2025-09-10 02:26:18.522889", "step": 135, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:18.552693", "step": 135, "epoch": 1 }, { "type": "loss", "content": 0.01871657930314541, "timestamp": "2025-09-10 02:26:18.576080", "step": 136, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:18.604924", "step": 136, "epoch": 1 }, { "type": "loss", "content": 0.04277872294187546, "timestamp": "2025-09-10 02:26:18.606623", "step": 137, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:18.635283", "step": 137, "epoch": 1 }, { "type": "loss", "content": 0.01327602844685316, "timestamp": "2025-09-10 02:26:18.636935", "step": 138, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:18.666162", "step": 138, "epoch": 1 }, { "type": "loss", "content": 0.040411483496427536, "timestamp": "2025-09-10 02:26:18.667696", "step": 139, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:18.696329", "step": 139, "epoch": 1 }, { "type": "loss", "content": 0.012431403622031212, "timestamp": "2025-09-10 02:26:18.719107", "step": 140, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:18.748303", "step": 140, "epoch": 1 }, { "type": "loss", "content": 0.022387627512216568, "timestamp": "2025-09-10 02:26:18.750077", "step": 141, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:18.789074", "step": 141, "epoch": 1 }, { "type": "loss", "content": 0.061557136476039886, "timestamp": "2025-09-10 02:26:18.792868", "step": 142, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:18.826710", "step": 142, "epoch": 1 }, { "type": "loss", "content": 0.023226479068398476, "timestamp": "2025-09-10 02:26:18.828500", "step": 143, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:18.857576", "step": 143, "epoch": 1 }, { "type": "loss", "content": 0.02590755559504032, "timestamp": "2025-09-10 02:26:18.881140", "step": 144, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:18.918310", "step": 144, "epoch": 1 }, { "type": "loss", "content": 0.024034647271037102, "timestamp": "2025-09-10 02:26:18.922067", "step": 145, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:18.971480", "step": 145, "epoch": 1 }, { "type": "loss", "content": 0.024593573063611984, "timestamp": "2025-09-10 02:26:18.974677", "step": 146, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:19.006397", "step": 146, "epoch": 1 }, { "type": "loss", "content": 0.03884469345211983, "timestamp": "2025-09-10 02:26:19.008010", "step": 147, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:19.037056", "step": 147, "epoch": 1 }, { "type": "loss", "content": 0.0037036570720374584, "timestamp": "2025-09-10 02:26:19.060018", "step": 148, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:19.088187", "step": 148, "epoch": 1 }, { "type": "loss", "content": 0.013467243872582912, "timestamp": "2025-09-10 02:26:19.089862", "step": 149, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:19.118234", "step": 149, "epoch": 1 }, { "type": "loss", "content": 0.03569980338215828, "timestamp": "2025-09-10 02:26:19.119846", "step": 150, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:19.148790", "step": 150, "epoch": 1 }, { "type": "loss", "content": 0.05327266454696655, "timestamp": "2025-09-10 02:26:19.150392", "step": 151, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:19.178971", "step": 151, "epoch": 1 }, { "type": "loss", "content": 0.02637994848191738, "timestamp": "2025-09-10 02:26:19.202054", "step": 152, "epoch": 1 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 96 ], "batch_size": 8, "flops": 1898404492032 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 5, 80 ], "batch_size": 8, "flops": 1582003754624 } ], "timestamp": "2025-09-10 02:26:21.067095", "step": 152, "epoch": 1 }, { "type": "pplx", "content": 2095964.7926953253, "timestamp": "2025-09-10 02:26:21.068909", "step": 152, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:21.097345", "step": 152, "epoch": 1 }, { "type": "loss", "content": 0.07849128544330597, "timestamp": "2025-09-10 02:26:21.098978", "step": 153, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:21.128487", "step": 153, "epoch": 1 }, { "type": "loss", "content": 0.03526788949966431, "timestamp": "2025-09-10 02:26:21.130140", "step": 154, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:21.159156", "step": 154, "epoch": 1 }, { "type": "loss", "content": 0.04433086887001991, "timestamp": "2025-09-10 02:26:21.160994", "step": 155, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:21.189477", "step": 155, "epoch": 1 }, { "type": "loss", "content": 0.07645846158266068, "timestamp": "2025-09-10 02:26:21.212664", "step": 156, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:21.241691", "step": 156, "epoch": 1 }, { "type": "loss", "content": 0.003905185032635927, "timestamp": "2025-09-10 02:26:21.243356", "step": 157, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:21.271923", "step": 157, "epoch": 1 }, { "type": "loss", "content": 0.006804810371249914, "timestamp": "2025-09-10 02:26:21.273506", "step": 158, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:21.302001", "step": 158, "epoch": 1 }, { "type": "loss", "content": 0.019923415035009384, "timestamp": "2025-09-10 02:26:21.303564", "step": 159, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:21.332038", "step": 159, "epoch": 1 }, { "type": "loss", "content": 0.05792221054434776, "timestamp": "2025-09-10 02:26:21.355247", "step": 160, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:21.384202", "step": 160, "epoch": 1 }, { "type": "loss", "content": 0.07608406245708466, "timestamp": "2025-09-10 02:26:21.385747", "step": 161, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:21.414044", "step": 161, "epoch": 1 }, { "type": "loss", "content": 0.02290247194468975, "timestamp": "2025-09-10 02:26:21.415414", "step": 162, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:21.444229", "step": 162, "epoch": 1 }, { "type": "loss", "content": 0.030202900990843773, "timestamp": "2025-09-10 02:26:21.445974", "step": 163, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:21.474480", "step": 163, "epoch": 1 }, { "type": "loss", "content": 0.008085962384939194, "timestamp": "2025-09-10 02:26:21.497594", "step": 164, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:21.526912", "step": 164, "epoch": 1 }, { "type": "loss", "content": 0.029565736651420593, "timestamp": "2025-09-10 02:26:21.528474", "step": 165, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:21.557647", "step": 165, "epoch": 1 }, { "type": "loss", "content": 0.07050912082195282, "timestamp": "2025-09-10 02:26:21.559200", "step": 166, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:21.587896", "step": 166, "epoch": 1 }, { "type": "loss", "content": 0.029368679970502853, "timestamp": "2025-09-10 02:26:21.589611", "step": 167, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:21.617896", "step": 167, "epoch": 1 }, { "type": "loss", "content": 0.03375962749123573, "timestamp": "2025-09-10 02:26:21.641349", "step": 168, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:21.670062", "step": 168, "epoch": 1 }, { "type": "loss", "content": 0.051859911531209946, "timestamp": "2025-09-10 02:26:21.671888", "step": 169, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:21.700528", "step": 169, "epoch": 1 }, { "type": "loss", "content": 0.03229187801480293, "timestamp": "2025-09-10 02:26:21.702132", "step": 170, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:21.730879", "step": 170, "epoch": 1 }, { "type": "loss", "content": 0.037152018398046494, "timestamp": "2025-09-10 02:26:21.732873", "step": 171, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:21.762359", "step": 171, "epoch": 1 }, { "type": "loss", "content": 0.044093675911426544, "timestamp": "2025-09-10 02:26:21.785669", "step": 172, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:21.814297", "step": 172, "epoch": 1 }, { "type": "loss", "content": 0.04556509107351303, "timestamp": "2025-09-10 02:26:21.816145", "step": 173, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:21.844590", "step": 173, "epoch": 1 }, { "type": "loss", "content": 0.039365023374557495, "timestamp": "2025-09-10 02:26:21.846394", "step": 174, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:21.875229", "step": 174, "epoch": 1 }, { "type": "loss", "content": 0.05335281416773796, "timestamp": "2025-09-10 02:26:21.876869", "step": 175, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:21.905172", "step": 175, "epoch": 1 }, { "type": "loss", "content": 0.03890601545572281, "timestamp": "2025-09-10 02:26:21.928384", "step": 176, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:21.957519", "step": 176, "epoch": 1 }, { "type": "loss", "content": 0.03906140476465225, "timestamp": "2025-09-10 02:26:21.959111", "step": 177, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:21.988085", "step": 177, "epoch": 1 }, { "type": "loss", "content": 0.019950097426772118, "timestamp": "2025-09-10 02:26:21.990672", "step": 178, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:22.019273", "step": 178, "epoch": 1 }, { "type": "loss", "content": 0.004369103349745274, "timestamp": "2025-09-10 02:26:22.021059", "step": 179, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:22.049632", "step": 179, "epoch": 1 }, { "type": "loss", "content": 0.06852971017360687, "timestamp": "2025-09-10 02:26:22.073099", "step": 180, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:22.102650", "step": 180, "epoch": 1 }, { "type": "loss", "content": 0.005697351414710283, "timestamp": "2025-09-10 02:26:22.104617", "step": 181, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:22.133331", "step": 181, "epoch": 1 }, { "type": "loss", "content": 0.009286266751587391, "timestamp": "2025-09-10 02:26:22.134685", "step": 182, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:22.162682", "step": 182, "epoch": 1 }, { "type": "loss", "content": 0.052384261041879654, "timestamp": "2025-09-10 02:26:22.164082", "step": 183, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:22.192832", "step": 183, "epoch": 1 }, { "type": "loss", "content": 0.03431450203061104, "timestamp": "2025-09-10 02:26:22.215938", "step": 184, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:22.245652", "step": 184, "epoch": 1 }, { "type": "loss", "content": 0.05092969536781311, "timestamp": "2025-09-10 02:26:22.246982", "step": 185, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:22.275224", "step": 185, "epoch": 1 }, { "type": "loss", "content": 0.022450143471360207, "timestamp": "2025-09-10 02:26:22.276665", "step": 186, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:22.305883", "step": 186, "epoch": 1 }, { "type": "loss", "content": 0.022179920226335526, "timestamp": "2025-09-10 02:26:22.307836", "step": 187, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:22.336793", "step": 187, "epoch": 1 }, { "type": "loss", "content": 0.0721798688173294, "timestamp": "2025-09-10 02:26:22.360332", "step": 188, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:22.389644", "step": 188, "epoch": 1 }, { "type": "loss", "content": 0.032928816974163055, "timestamp": "2025-09-10 02:26:22.391307", "step": 189, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:22.420074", "step": 189, "epoch": 1 }, { "type": "loss", "content": 0.023632245138287544, "timestamp": "2025-09-10 02:26:22.421956", "step": 190, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:22.450670", "step": 190, "epoch": 1 }, { "type": "loss", "content": 0.05820164084434509, "timestamp": "2025-09-10 02:26:22.452548", "step": 191, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:22.481380", "step": 191, "epoch": 1 }, { "type": "loss", "content": 0.05574128404259682, "timestamp": "2025-09-10 02:26:22.504754", "step": 192, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:22.533254", "step": 192, "epoch": 1 }, { "type": "loss", "content": 0.0478353314101696, "timestamp": "2025-09-10 02:26:22.534978", "step": 193, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:22.563463", "step": 193, "epoch": 1 }, { "type": "loss", "content": 0.030672820284962654, "timestamp": "2025-09-10 02:26:22.565396", "step": 194, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:22.594615", "step": 194, "epoch": 1 }, { "type": "loss", "content": 0.032667286694049835, "timestamp": "2025-09-10 02:26:22.596297", "step": 195, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:22.624958", "step": 195, "epoch": 1 }, { "type": "loss", "content": 0.03710975870490074, "timestamp": "2025-09-10 02:26:22.648364", "step": 196, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:22.677810", "step": 196, "epoch": 1 }, { "type": "loss", "content": 0.03341403231024742, "timestamp": "2025-09-10 02:26:22.679287", "step": 197, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:22.708573", "step": 197, "epoch": 1 }, { "type": "loss", "content": 0.05037161707878113, "timestamp": "2025-09-10 02:26:22.710060", "step": 198, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:22.738359", "step": 198, "epoch": 1 }, { "type": "loss", "content": 0.07397724688053131, "timestamp": "2025-09-10 02:26:22.739819", "step": 199, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:22.767899", "step": 199, "epoch": 1 }, { "type": "loss", "content": 0.059347983449697495, "timestamp": "2025-09-10 02:26:22.791114", "step": 200, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:22.819227", "step": 200, "epoch": 1 }, { "type": "loss", "content": 0.014053313061594963, "timestamp": "2025-09-10 02:26:22.820644", "step": 201, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:22.848434", "step": 201, "epoch": 1 }, { "type": "loss", "content": 0.02120036818087101, "timestamp": "2025-09-10 02:26:22.849833", "step": 202, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:22.878051", "step": 202, "epoch": 1 }, { "type": "loss", "content": 0.03766484931111336, "timestamp": "2025-09-10 02:26:22.879419", "step": 203, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:22.908173", "step": 203, "epoch": 1 }, { "type": "loss", "content": 0.04409381002187729, "timestamp": "2025-09-10 02:26:22.931082", "step": 204, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:22.959405", "step": 204, "epoch": 1 }, { "type": "loss", "content": 0.009215420112013817, "timestamp": "2025-09-10 02:26:22.960760", "step": 205, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:22.988966", "step": 205, "epoch": 1 }, { "type": "loss", "content": 0.052273061126470566, "timestamp": "2025-09-10 02:26:22.990152", "step": 206, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:23.018590", "step": 206, "epoch": 1 }, { "type": "loss", "content": 0.0432240329682827, "timestamp": "2025-09-10 02:26:23.020104", "step": 207, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:23.048231", "step": 207, "epoch": 1 }, { "type": "loss", "content": 0.034780390560626984, "timestamp": "2025-09-10 02:26:23.071521", "step": 208, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:23.100987", "step": 208, "epoch": 1 }, { "type": "loss", "content": 0.04283388704061508, "timestamp": "2025-09-10 02:26:23.102834", "step": 209, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:23.131643", "step": 209, "epoch": 1 }, { "type": "loss", "content": 0.022248506546020508, "timestamp": "2025-09-10 02:26:23.133269", "step": 210, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:23.162059", "step": 210, "epoch": 1 }, { "type": "loss", "content": 0.05728680640459061, "timestamp": "2025-09-10 02:26:23.163686", "step": 211, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:23.192244", "step": 211, "epoch": 1 }, { "type": "loss", "content": 0.027303336188197136, "timestamp": "2025-09-10 02:26:23.215400", "step": 212, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:23.244106", "step": 212, "epoch": 1 }, { "type": "loss", "content": 0.04928375035524368, "timestamp": "2025-09-10 02:26:23.245604", "step": 213, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:23.274221", "step": 213, "epoch": 1 }, { "type": "loss", "content": 0.05382887274026871, "timestamp": "2025-09-10 02:26:23.275552", "step": 214, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:23.303867", "step": 214, "epoch": 1 }, { "type": "loss", "content": 0.05966060236096382, "timestamp": "2025-09-10 02:26:23.305555", "step": 215, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:23.333772", "step": 215, "epoch": 1 }, { "type": "loss", "content": 0.02548491768538952, "timestamp": "2025-09-10 02:26:23.357028", "step": 216, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:23.385609", "step": 216, "epoch": 1 }, { "type": "loss", "content": 0.023930877447128296, "timestamp": "2025-09-10 02:26:23.387413", "step": 217, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:23.415962", "step": 217, "epoch": 1 }, { "type": "loss", "content": 0.04809613898396492, "timestamp": "2025-09-10 02:26:23.417366", "step": 218, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:23.446047", "step": 218, "epoch": 1 }, { "type": "loss", "content": 0.05492384359240532, "timestamp": "2025-09-10 02:26:23.447487", "step": 219, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:23.475862", "step": 219, "epoch": 1 }, { "type": "loss", "content": 0.0793657973408699, "timestamp": "2025-09-10 02:26:23.499124", "step": 220, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:23.527543", "step": 220, "epoch": 1 }, { "type": "loss", "content": 0.02191893383860588, "timestamp": "2025-09-10 02:26:23.529118", "step": 221, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:23.557487", "step": 221, "epoch": 1 }, { "type": "loss", "content": 0.04995134845376015, "timestamp": "2025-09-10 02:26:23.559136", "step": 222, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:23.587667", "step": 222, "epoch": 1 }, { "type": "loss", "content": 0.05806097760796547, "timestamp": "2025-09-10 02:26:23.589189", "step": 223, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:23.617464", "step": 223, "epoch": 1 }, { "type": "loss", "content": 0.0502280592918396, "timestamp": "2025-09-10 02:26:23.640899", "step": 224, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:23.669794", "step": 224, "epoch": 1 }, { "type": "loss", "content": 0.05602055415511131, "timestamp": "2025-09-10 02:26:23.671377", "step": 225, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:23.699700", "step": 225, "epoch": 1 }, { "type": "loss", "content": 0.02692429907619953, "timestamp": "2025-09-10 02:26:23.701562", "step": 226, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:23.729516", "step": 226, "epoch": 1 }, { "type": "loss", "content": 0.07570789754390717, "timestamp": "2025-09-10 02:26:23.730929", "step": 227, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:23.758721", "step": 227, "epoch": 1 }, { "type": "loss", "content": 0.03995998576283455, "timestamp": "2025-09-10 02:26:23.782155", "step": 228, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:23.810338", "step": 228, "epoch": 1 }, { "type": "loss", "content": 0.023368533700704575, "timestamp": "2025-09-10 02:26:23.812031", "step": 229, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:23.840340", "step": 229, "epoch": 1 }, { "type": "loss", "content": 0.05306321382522583, "timestamp": "2025-09-10 02:26:23.841766", "step": 230, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:23.869739", "step": 230, "epoch": 1 }, { "type": "loss", "content": 0.017180833965539932, "timestamp": "2025-09-10 02:26:23.871459", "step": 231, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:23.900307", "step": 231, "epoch": 1 }, { "type": "loss", "content": 0.02015542984008789, "timestamp": "2025-09-10 02:26:23.923502", "step": 232, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:23.952647", "step": 232, "epoch": 1 }, { "type": "loss", "content": 0.043472178280353546, "timestamp": "2025-09-10 02:26:23.954655", "step": 233, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:23.982808", "step": 233, "epoch": 1 }, { "type": "loss", "content": 0.042272455990314484, "timestamp": "2025-09-10 02:26:23.984249", "step": 234, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:24.012630", "step": 234, "epoch": 1 }, { "type": "loss", "content": 0.0589950866997242, "timestamp": "2025-09-10 02:26:24.014333", "step": 235, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:24.042538", "step": 235, "epoch": 1 }, { "type": "loss", "content": 0.012602618895471096, "timestamp": "2025-09-10 02:26:24.065811", "step": 236, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:24.094518", "step": 236, "epoch": 1 }, { "type": "loss", "content": 0.02325502410531044, "timestamp": "2025-09-10 02:26:24.096016", "step": 237, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:24.124289", "step": 237, "epoch": 1 }, { "type": "loss", "content": 0.019194353371858597, "timestamp": "2025-09-10 02:26:24.126113", "step": 238, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:24.154442", "step": 238, "epoch": 1 }, { "type": "loss", "content": 0.03466707095503807, "timestamp": "2025-09-10 02:26:24.156128", "step": 239, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:24.184472", "step": 239, "epoch": 1 }, { "type": "loss", "content": 0.027127530425786972, "timestamp": "2025-09-10 02:26:24.207676", "step": 240, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:24.236747", "step": 240, "epoch": 1 }, { "type": "loss", "content": 0.010394366458058357, "timestamp": "2025-09-10 02:26:24.239310", "step": 241, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:24.268133", "step": 241, "epoch": 1 }, { "type": "loss", "content": 0.05274328589439392, "timestamp": "2025-09-10 02:26:24.269714", "step": 242, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:24.298342", "step": 242, "epoch": 1 }, { "type": "loss", "content": 0.03417865186929703, "timestamp": "2025-09-10 02:26:24.300067", "step": 243, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:24.328345", "step": 243, "epoch": 1 }, { "type": "loss", "content": 0.022119751200079918, "timestamp": "2025-09-10 02:26:24.351448", "step": 244, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:24.379231", "step": 244, "epoch": 1 }, { "type": "loss", "content": 0.05870979651808739, "timestamp": "2025-09-10 02:26:24.380881", "step": 245, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:24.408380", "step": 245, "epoch": 1 }, { "type": "loss", "content": 0.042090386152267456, "timestamp": "2025-09-10 02:26:24.409965", "step": 246, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:24.437764", "step": 246, "epoch": 1 }, { "type": "loss", "content": 0.04785066470503807, "timestamp": "2025-09-10 02:26:24.439297", "step": 247, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:24.467589", "step": 247, "epoch": 1 }, { "type": "loss", "content": 0.03291523829102516, "timestamp": "2025-09-10 02:26:24.490680", "step": 248, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:24.519144", "step": 248, "epoch": 1 }, { "type": "loss", "content": 0.04091138020157814, "timestamp": "2025-09-10 02:26:24.520799", "step": 249, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:24.548822", "step": 249, "epoch": 1 }, { "type": "loss", "content": 0.03316140174865723, "timestamp": "2025-09-10 02:26:24.550258", "step": 250, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:24.578359", "step": 250, "epoch": 1 }, { "type": "loss", "content": 0.034854013472795486, "timestamp": "2025-09-10 02:26:24.579803", "step": 251, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:24.607541", "step": 251, "epoch": 1 }, { "type": "loss", "content": 0.06467428058385849, "timestamp": "2025-09-10 02:26:24.630608", "step": 252, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:24.661119", "step": 252, "epoch": 1 }, { "type": "loss", "content": 0.0717066153883934, "timestamp": "2025-09-10 02:26:24.662639", "step": 253, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:24.690575", "step": 253, "epoch": 1 }, { "type": "loss", "content": 0.03744388371706009, "timestamp": "2025-09-10 02:26:24.692103", "step": 254, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:24.720162", "step": 254, "epoch": 1 }, { "type": "loss", "content": 0.034172169864177704, "timestamp": "2025-09-10 02:26:24.721671", "step": 255, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:24.749322", "step": 255, "epoch": 1 }, { "type": "loss", "content": 0.04225926846265793, "timestamp": "2025-09-10 02:26:24.772543", "step": 256, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:24.800634", "step": 256, "epoch": 1 }, { "type": "loss", "content": 0.040328167378902435, "timestamp": "2025-09-10 02:26:24.802285", "step": 257, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:24.830388", "step": 257, "epoch": 1 }, { "type": "loss", "content": 0.038004156202077866, "timestamp": "2025-09-10 02:26:24.831991", "step": 258, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:24.859825", "step": 258, "epoch": 1 }, { "type": "loss", "content": 0.022771785035729408, "timestamp": "2025-09-10 02:26:24.861514", "step": 259, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:24.889773", "step": 259, "epoch": 1 }, { "type": "loss", "content": 0.03296079486608505, "timestamp": "2025-09-10 02:26:24.912867", "step": 260, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:24.941504", "step": 260, "epoch": 1 }, { "type": "loss", "content": 0.04581866413354874, "timestamp": "2025-09-10 02:26:24.942995", "step": 261, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:24.970835", "step": 261, "epoch": 1 }, { "type": "loss", "content": 0.024789679795503616, "timestamp": "2025-09-10 02:26:24.972438", "step": 262, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:25.000486", "step": 262, "epoch": 1 }, { "type": "loss", "content": 0.03151870146393776, "timestamp": "2025-09-10 02:26:25.002080", "step": 263, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:25.029876", "step": 263, "epoch": 1 }, { "type": "loss", "content": 0.04139602929353714, "timestamp": "2025-09-10 02:26:25.053004", "step": 264, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:25.081238", "step": 264, "epoch": 1 }, { "type": "loss", "content": 0.03249174356460571, "timestamp": "2025-09-10 02:26:25.083009", "step": 265, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:25.111405", "step": 265, "epoch": 1 }, { "type": "loss", "content": 0.00858510285615921, "timestamp": "2025-09-10 02:26:25.113002", "step": 266, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:25.141828", "step": 266, "epoch": 1 }, { "type": "loss", "content": 0.013428745791316032, "timestamp": "2025-09-10 02:26:25.143369", "step": 267, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:25.171649", "step": 267, "epoch": 1 }, { "type": "loss", "content": 0.02350384183228016, "timestamp": "2025-09-10 02:26:25.194926", "step": 268, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:25.222959", "step": 268, "epoch": 1 }, { "type": "loss", "content": 0.0218183733522892, "timestamp": "2025-09-10 02:26:25.224586", "step": 269, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:25.252694", "step": 269, "epoch": 1 }, { "type": "loss", "content": 0.06323713809251785, "timestamp": "2025-09-10 02:26:25.254091", "step": 270, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:25.282153", "step": 270, "epoch": 1 }, { "type": "loss", "content": 0.014637206681072712, "timestamp": "2025-09-10 02:26:25.283831", "step": 271, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:25.312251", "step": 271, "epoch": 1 }, { "type": "loss", "content": 0.04055526852607727, "timestamp": "2025-09-10 02:26:25.335143", "step": 272, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:25.363879", "step": 272, "epoch": 1 }, { "type": "loss", "content": 0.01910637505352497, "timestamp": "2025-09-10 02:26:25.364992", "step": 273, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:25.392997", "step": 273, "epoch": 1 }, { "type": "loss", "content": 0.04856686666607857, "timestamp": "2025-09-10 02:26:25.394622", "step": 274, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:25.422567", "step": 274, "epoch": 1 }, { "type": "loss", "content": 0.04855852574110031, "timestamp": "2025-09-10 02:26:25.423932", "step": 275, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:25.451794", "step": 275, "epoch": 1 }, { "type": "loss", "content": 0.014289570041000843, "timestamp": "2025-09-10 02:26:25.474936", "step": 276, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:25.502817", "step": 276, "epoch": 1 }, { "type": "loss", "content": 0.10124220699071884, "timestamp": "2025-09-10 02:26:25.504467", "step": 277, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:25.532587", "step": 277, "epoch": 1 }, { "type": "loss", "content": 0.027577316388487816, "timestamp": "2025-09-10 02:26:25.534069", "step": 278, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:25.562061", "step": 278, "epoch": 1 }, { "type": "loss", "content": 0.09541350603103638, "timestamp": "2025-09-10 02:26:25.563767", "step": 279, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:25.591997", "step": 279, "epoch": 1 }, { "type": "loss", "content": 0.04130064696073532, "timestamp": "2025-09-10 02:26:25.614892", "step": 280, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:25.643227", "step": 280, "epoch": 1 }, { "type": "loss", "content": 0.024617092683911324, "timestamp": "2025-09-10 02:26:25.645047", "step": 281, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:25.673124", "step": 281, "epoch": 1 }, { "type": "loss", "content": 0.04850497096776962, "timestamp": "2025-09-10 02:26:25.674963", "step": 282, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:25.703218", "step": 282, "epoch": 1 }, { "type": "loss", "content": 0.02256542444229126, "timestamp": "2025-09-10 02:26:25.705824", "step": 283, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:25.735526", "step": 283, "epoch": 1 }, { "type": "loss", "content": 0.03141311928629875, "timestamp": "2025-09-10 02:26:25.758424", "step": 284, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:25.786285", "step": 284, "epoch": 1 }, { "type": "loss", "content": 0.02413778007030487, "timestamp": "2025-09-10 02:26:25.787884", "step": 285, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:25.815599", "step": 285, "epoch": 1 }, { "type": "loss", "content": 0.04970148578286171, "timestamp": "2025-09-10 02:26:25.817228", "step": 286, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:25.845182", "step": 286, "epoch": 1 }, { "type": "loss", "content": 0.0072316196747124195, "timestamp": "2025-09-10 02:26:25.846849", "step": 287, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:25.874843", "step": 287, "epoch": 1 }, { "type": "loss", "content": 0.036510735750198364, "timestamp": "2025-09-10 02:26:25.897636", "step": 288, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:25.925740", "step": 288, "epoch": 1 }, { "type": "loss", "content": 0.03437785059213638, "timestamp": "2025-09-10 02:26:25.927246", "step": 289, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:25.955120", "step": 289, "epoch": 1 }, { "type": "loss", "content": 0.04750484228134155, "timestamp": "2025-09-10 02:26:25.956598", "step": 290, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:25.984334", "step": 290, "epoch": 1 }, { "type": "loss", "content": 0.04326671361923218, "timestamp": "2025-09-10 02:26:25.985741", "step": 291, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:26.013322", "step": 291, "epoch": 1 }, { "type": "loss", "content": 0.08769829571247101, "timestamp": "2025-09-10 02:26:26.036309", "step": 292, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:26.064161", "step": 292, "epoch": 1 }, { "type": "loss", "content": 0.06923764199018478, "timestamp": "2025-09-10 02:26:26.065701", "step": 293, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:26.093346", "step": 293, "epoch": 1 }, { "type": "loss", "content": 0.07286082208156586, "timestamp": "2025-09-10 02:26:26.094899", "step": 294, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:26.123423", "step": 294, "epoch": 1 }, { "type": "loss", "content": 0.05491286516189575, "timestamp": "2025-09-10 02:26:26.124903", "step": 295, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:26.152691", "step": 295, "epoch": 1 }, { "type": "loss", "content": 0.04864270240068436, "timestamp": "2025-09-10 02:26:26.176021", "step": 296, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:26.204020", "step": 296, "epoch": 1 }, { "type": "loss", "content": 0.058525215834379196, "timestamp": "2025-09-10 02:26:26.205527", "step": 297, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:26.233743", "step": 297, "epoch": 1 }, { "type": "loss", "content": 0.0449678897857666, "timestamp": "2025-09-10 02:26:26.235093", "step": 298, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:26.262786", "step": 298, "epoch": 1 }, { "type": "loss", "content": 0.038681477308273315, "timestamp": "2025-09-10 02:26:26.264395", "step": 299, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:26.292636", "step": 299, "epoch": 1 }, { "type": "loss", "content": 0.03218863531947136, "timestamp": "2025-09-10 02:26:26.315507", "step": 300, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:26.343609", "step": 300, "epoch": 1 }, { "type": "loss", "content": 0.027742283418774605, "timestamp": "2025-09-10 02:26:26.344935", "step": 301, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:26.372619", "step": 301, "epoch": 1 }, { "type": "loss", "content": 0.05454636365175247, "timestamp": "2025-09-10 02:26:26.374330", "step": 302, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:26.402844", "step": 302, "epoch": 1 }, { "type": "loss", "content": 0.03219006583094597, "timestamp": "2025-09-10 02:26:26.404301", "step": 303, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:26.432399", "step": 303, "epoch": 1 }, { "type": "loss", "content": 0.022633623331785202, "timestamp": "2025-09-10 02:26:26.455528", "step": 304, "epoch": 1 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 96 ], "batch_size": 8, "flops": 1898404492032 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 5, 80 ], "batch_size": 8, "flops": 1582003754624 } ], "timestamp": "2025-09-10 02:26:28.287099", "step": 304, "epoch": 1 }, { "type": "pplx", "content": 2404055.911940932, "timestamp": "2025-09-10 02:26:28.288565", "step": 304, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:28.315301", "step": 304, "epoch": 1 }, { "type": "loss", "content": 0.06278765946626663, "timestamp": "2025-09-10 02:26:28.316596", "step": 305, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:28.344481", "step": 305, "epoch": 1 }, { "type": "loss", "content": 0.05780908837914467, "timestamp": "2025-09-10 02:26:28.346105", "step": 306, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:28.373737", "step": 306, "epoch": 1 }, { "type": "loss", "content": 0.0264920461922884, "timestamp": "2025-09-10 02:26:28.375032", "step": 307, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:28.402551", "step": 307, "epoch": 1 }, { "type": "loss", "content": 0.0319293811917305, "timestamp": "2025-09-10 02:26:28.425268", "step": 308, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:28.452946", "step": 308, "epoch": 1 }, { "type": "loss", "content": 0.03297847881913185, "timestamp": "2025-09-10 02:26:28.454275", "step": 309, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:28.481988", "step": 309, "epoch": 1 }, { "type": "loss", "content": 0.025076931342482567, "timestamp": "2025-09-10 02:26:28.483306", "step": 310, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:28.511329", "step": 310, "epoch": 1 }, { "type": "loss", "content": 0.044106367975473404, "timestamp": "2025-09-10 02:26:28.512730", "step": 311, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:28.540373", "step": 311, "epoch": 1 }, { "type": "loss", "content": 0.01750769279897213, "timestamp": "2025-09-10 02:26:28.563519", "step": 312, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:28.591282", "step": 312, "epoch": 1 }, { "type": "loss", "content": 0.05220300331711769, "timestamp": "2025-09-10 02:26:28.592883", "step": 313, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:28.620596", "step": 313, "epoch": 1 }, { "type": "loss", "content": 0.039120737463235855, "timestamp": "2025-09-10 02:26:28.622019", "step": 314, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:28.649734", "step": 314, "epoch": 1 }, { "type": "loss", "content": 0.0120754549279809, "timestamp": "2025-09-10 02:26:28.651290", "step": 315, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:28.679231", "step": 315, "epoch": 1 }, { "type": "loss", "content": 0.018747910857200623, "timestamp": "2025-09-10 02:26:28.702267", "step": 316, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:28.730022", "step": 316, "epoch": 1 }, { "type": "loss", "content": 0.026074785739183426, "timestamp": "2025-09-10 02:26:28.731214", "step": 317, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:28.759159", "step": 317, "epoch": 1 }, { "type": "loss", "content": 0.043595410883426666, "timestamp": "2025-09-10 02:26:28.760489", "step": 318, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:28.788423", "step": 318, "epoch": 1 }, { "type": "loss", "content": 0.026542428880929947, "timestamp": "2025-09-10 02:26:28.790054", "step": 319, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:28.818275", "step": 319, "epoch": 1 }, { "type": "loss", "content": 0.027812568470835686, "timestamp": "2025-09-10 02:26:28.841584", "step": 320, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:28.869599", "step": 320, "epoch": 1 }, { "type": "loss", "content": 0.04263043776154518, "timestamp": "2025-09-10 02:26:28.871266", "step": 321, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:28.899649", "step": 321, "epoch": 1 }, { "type": "loss", "content": 0.05384966358542442, "timestamp": "2025-09-10 02:26:28.901071", "step": 322, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:28.928534", "step": 322, "epoch": 1 }, { "type": "loss", "content": 0.02516597881913185, "timestamp": "2025-09-10 02:26:28.930222", "step": 323, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:28.958245", "step": 323, "epoch": 1 }, { "type": "loss", "content": 0.026139909401535988, "timestamp": "2025-09-10 02:26:28.981131", "step": 324, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:29.009112", "step": 324, "epoch": 1 }, { "type": "loss", "content": 0.06678090989589691, "timestamp": "2025-09-10 02:26:29.010501", "step": 325, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:29.038574", "step": 325, "epoch": 1 }, { "type": "loss", "content": 0.04635394737124443, "timestamp": "2025-09-10 02:26:29.040095", "step": 326, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:29.067779", "step": 326, "epoch": 1 }, { "type": "loss", "content": 0.02118229679763317, "timestamp": "2025-09-10 02:26:29.069291", "step": 327, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:29.097315", "step": 327, "epoch": 1 }, { "type": "loss", "content": 0.02434566244482994, "timestamp": "2025-09-10 02:26:29.120209", "step": 328, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:29.147958", "step": 328, "epoch": 1 }, { "type": "loss", "content": 0.05468794330954552, "timestamp": "2025-09-10 02:26:29.149141", "step": 329, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:29.176857", "step": 329, "epoch": 1 }, { "type": "loss", "content": 0.036136846989393234, "timestamp": "2025-09-10 02:26:29.178020", "step": 330, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:29.205772", "step": 330, "epoch": 1 }, { "type": "loss", "content": 0.0247384961694479, "timestamp": "2025-09-10 02:26:29.207121", "step": 331, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:29.234765", "step": 331, "epoch": 1 }, { "type": "loss", "content": 0.06029272824525833, "timestamp": "2025-09-10 02:26:29.257530", "step": 332, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:29.285051", "step": 332, "epoch": 1 }, { "type": "loss", "content": 0.023629697039723396, "timestamp": "2025-09-10 02:26:29.286641", "step": 333, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:29.315728", "step": 333, "epoch": 1 }, { "type": "loss", "content": 0.04386547580361366, "timestamp": "2025-09-10 02:26:29.317136", "step": 334, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:29.344895", "step": 334, "epoch": 1 }, { "type": "loss", "content": 0.03868928551673889, "timestamp": "2025-09-10 02:26:29.346552", "step": 335, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:29.374354", "step": 335, "epoch": 1 }, { "type": "loss", "content": 0.049940530210733414, "timestamp": "2025-09-10 02:26:29.397501", "step": 336, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:29.425829", "step": 336, "epoch": 1 }, { "type": "loss", "content": 0.0499514676630497, "timestamp": "2025-09-10 02:26:29.427480", "step": 337, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:29.456414", "step": 337, "epoch": 1 }, { "type": "loss", "content": 0.037628378719091415, "timestamp": "2025-09-10 02:26:29.457910", "step": 338, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:29.486295", "step": 338, "epoch": 1 }, { "type": "loss", "content": 0.05219903960824013, "timestamp": "2025-09-10 02:26:29.487984", "step": 339, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:29.515915", "step": 339, "epoch": 1 }, { "type": "loss", "content": 0.021979600191116333, "timestamp": "2025-09-10 02:26:29.538793", "step": 340, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:29.567219", "step": 340, "epoch": 1 }, { "type": "loss", "content": 0.011105814017355442, "timestamp": "2025-09-10 02:26:29.568345", "step": 341, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:29.596107", "step": 341, "epoch": 1 }, { "type": "loss", "content": 0.031031189486384392, "timestamp": "2025-09-10 02:26:29.597302", "step": 342, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:29.625049", "step": 342, "epoch": 1 }, { "type": "loss", "content": 0.04790028557181358, "timestamp": "2025-09-10 02:26:29.626178", "step": 343, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:29.654164", "step": 343, "epoch": 1 }, { "type": "loss", "content": 0.023874282836914062, "timestamp": "2025-09-10 02:26:29.676876", "step": 344, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:29.705452", "step": 344, "epoch": 1 }, { "type": "loss", "content": 0.03260252997279167, "timestamp": "2025-09-10 02:26:29.706882", "step": 345, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:29.735009", "step": 345, "epoch": 1 }, { "type": "loss", "content": 0.041003767400979996, "timestamp": "2025-09-10 02:26:29.736889", "step": 346, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:29.765007", "step": 346, "epoch": 1 }, { "type": "loss", "content": 0.036079805344343185, "timestamp": "2025-09-10 02:26:29.766550", "step": 347, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:29.794840", "step": 347, "epoch": 1 }, { "type": "loss", "content": 0.0438341461122036, "timestamp": "2025-09-10 02:26:29.818061", "step": 348, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:29.846857", "step": 348, "epoch": 1 }, { "type": "loss", "content": 0.011688505299389362, "timestamp": "2025-09-10 02:26:29.848472", "step": 349, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:29.877246", "step": 349, "epoch": 1 }, { "type": "loss", "content": 0.055797092616558075, "timestamp": "2025-09-10 02:26:29.878766", "step": 350, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:29.906809", "step": 350, "epoch": 1 }, { "type": "loss", "content": 0.05401964858174324, "timestamp": "2025-09-10 02:26:29.908331", "step": 351, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:29.936268", "step": 351, "epoch": 1 }, { "type": "loss", "content": 0.03948580473661423, "timestamp": "2025-09-10 02:26:29.959372", "step": 352, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:29.987192", "step": 352, "epoch": 1 }, { "type": "loss", "content": 0.046306490898132324, "timestamp": "2025-09-10 02:26:29.988639", "step": 353, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:30.016659", "step": 353, "epoch": 1 }, { "type": "loss", "content": 0.045786309987306595, "timestamp": "2025-09-10 02:26:30.018133", "step": 354, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:30.046106", "step": 354, "epoch": 1 }, { "type": "loss", "content": 0.03784593939781189, "timestamp": "2025-09-10 02:26:30.047503", "step": 355, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:30.075909", "step": 355, "epoch": 1 }, { "type": "loss", "content": 0.055302880704402924, "timestamp": "2025-09-10 02:26:30.098942", "step": 356, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:30.127583", "step": 356, "epoch": 1 }, { "type": "loss", "content": 0.04041001945734024, "timestamp": "2025-09-10 02:26:30.129300", "step": 357, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:30.157581", "step": 357, "epoch": 1 }, { "type": "loss", "content": 0.028578907251358032, "timestamp": "2025-09-10 02:26:30.159428", "step": 358, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:30.187921", "step": 358, "epoch": 1 }, { "type": "loss", "content": 0.03948519378900528, "timestamp": "2025-09-10 02:26:30.189305", "step": 359, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:30.217249", "step": 359, "epoch": 1 }, { "type": "loss", "content": 0.03630781173706055, "timestamp": "2025-09-10 02:26:30.240087", "step": 360, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:30.268075", "step": 360, "epoch": 1 }, { "type": "loss", "content": 0.02764829434454441, "timestamp": "2025-09-10 02:26:30.269573", "step": 361, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:30.297606", "step": 361, "epoch": 1 }, { "type": "loss", "content": 0.04786473512649536, "timestamp": "2025-09-10 02:26:30.299181", "step": 362, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:30.327525", "step": 362, "epoch": 1 }, { "type": "loss", "content": 0.04653376340866089, "timestamp": "2025-09-10 02:26:30.328795", "step": 363, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:30.356643", "step": 363, "epoch": 1 }, { "type": "loss", "content": 0.05551743507385254, "timestamp": "2025-09-10 02:26:30.379504", "step": 364, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:30.407093", "step": 364, "epoch": 1 }, { "type": "loss", "content": 0.030665677040815353, "timestamp": "2025-09-10 02:26:30.408375", "step": 365, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:30.436064", "step": 365, "epoch": 1 }, { "type": "loss", "content": 0.04767214134335518, "timestamp": "2025-09-10 02:26:30.437275", "step": 366, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:30.464992", "step": 366, "epoch": 1 }, { "type": "loss", "content": 0.03540532663464546, "timestamp": "2025-09-10 02:26:30.467534", "step": 367, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:30.495618", "step": 367, "epoch": 1 }, { "type": "loss", "content": 0.015588260255753994, "timestamp": "2025-09-10 02:26:30.518804", "step": 368, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:30.547444", "step": 368, "epoch": 1 }, { "type": "loss", "content": 0.024345332756638527, "timestamp": "2025-09-10 02:26:30.549045", "step": 369, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:30.577048", "step": 369, "epoch": 1 }, { "type": "loss", "content": 0.03131551668047905, "timestamp": "2025-09-10 02:26:30.578875", "step": 370, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:30.607443", "step": 370, "epoch": 1 }, { "type": "loss", "content": 0.03911859542131424, "timestamp": "2025-09-10 02:26:30.608823", "step": 371, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:30.636561", "step": 371, "epoch": 1 }, { "type": "loss", "content": 0.02644316479563713, "timestamp": "2025-09-10 02:26:30.659794", "step": 372, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:30.688188", "step": 372, "epoch": 1 }, { "type": "loss", "content": 0.008261355571448803, "timestamp": "2025-09-10 02:26:30.689777", "step": 373, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:30.718062", "step": 373, "epoch": 1 }, { "type": "loss", "content": 0.014204383827745914, "timestamp": "2025-09-10 02:26:30.719783", "step": 374, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:30.747651", "step": 374, "epoch": 1 }, { "type": "loss", "content": 0.019057517871260643, "timestamp": "2025-09-10 02:26:30.749145", "step": 375, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:30.777136", "step": 375, "epoch": 1 }, { "type": "loss", "content": 0.03331819921731949, "timestamp": "2025-09-10 02:26:30.799951", "step": 376, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:30.827916", "step": 376, "epoch": 1 }, { "type": "loss", "content": 0.01622638665139675, "timestamp": "2025-09-10 02:26:30.829426", "step": 377, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:30.857424", "step": 377, "epoch": 1 }, { "type": "loss", "content": 0.012481695041060448, "timestamp": "2025-09-10 02:26:30.859089", "step": 378, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:30.887322", "step": 378, "epoch": 1 }, { "type": "loss", "content": 0.04068734496831894, "timestamp": "2025-09-10 02:26:30.889238", "step": 379, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:30.917607", "step": 379, "epoch": 1 }, { "type": "loss", "content": 0.019415130838751793, "timestamp": "2025-09-10 02:26:30.940485", "step": 380, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:30.968986", "step": 380, "epoch": 1 }, { "type": "loss", "content": 0.01683415099978447, "timestamp": "2025-09-10 02:26:30.970732", "step": 381, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:30.998774", "step": 381, "epoch": 1 }, { "type": "loss", "content": 0.02263670042157173, "timestamp": "2025-09-10 02:26:31.000342", "step": 382, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:31.028676", "step": 382, "epoch": 1 }, { "type": "loss", "content": 0.03133777156472206, "timestamp": "2025-09-10 02:26:31.030254", "step": 383, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:31.058664", "step": 383, "epoch": 1 }, { "type": "loss", "content": 0.026273494586348534, "timestamp": "2025-09-10 02:26:31.081769", "step": 384, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:31.110375", "step": 384, "epoch": 1 }, { "type": "loss", "content": 0.05546477437019348, "timestamp": "2025-09-10 02:26:31.111806", "step": 385, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:31.139794", "step": 385, "epoch": 1 }, { "type": "loss", "content": 0.02453637681901455, "timestamp": "2025-09-10 02:26:31.141375", "step": 386, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:31.169903", "step": 386, "epoch": 1 }, { "type": "loss", "content": 0.07124367356300354, "timestamp": "2025-09-10 02:26:31.171271", "step": 387, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:31.199266", "step": 387, "epoch": 1 }, { "type": "loss", "content": 0.04478440433740616, "timestamp": "2025-09-10 02:26:31.221897", "step": 388, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:31.249920", "step": 388, "epoch": 1 }, { "type": "loss", "content": 0.008734814822673798, "timestamp": "2025-09-10 02:26:31.251377", "step": 389, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:31.279567", "step": 389, "epoch": 1 }, { "type": "loss", "content": 0.045698363333940506, "timestamp": "2025-09-10 02:26:31.281077", "step": 390, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:31.308974", "step": 390, "epoch": 1 }, { "type": "loss", "content": 0.09974842518568039, "timestamp": "2025-09-10 02:26:31.310450", "step": 391, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:31.338890", "step": 391, "epoch": 1 }, { "type": "loss", "content": 0.01415980700403452, "timestamp": "2025-09-10 02:26:31.362052", "step": 392, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:31.391100", "step": 392, "epoch": 1 }, { "type": "loss", "content": 0.024075329303741455, "timestamp": "2025-09-10 02:26:31.393084", "step": 393, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:31.421457", "step": 393, "epoch": 1 }, { "type": "loss", "content": 0.008073990233242512, "timestamp": "2025-09-10 02:26:31.423444", "step": 394, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:31.451887", "step": 394, "epoch": 1 }, { "type": "loss", "content": 0.04262285679578781, "timestamp": "2025-09-10 02:26:31.453910", "step": 395, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:31.482300", "step": 395, "epoch": 1 }, { "type": "loss", "content": 0.02701367810368538, "timestamp": "2025-09-10 02:26:31.505723", "step": 396, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:31.535062", "step": 396, "epoch": 1 }, { "type": "loss", "content": 0.04329529032111168, "timestamp": "2025-09-10 02:26:31.537661", "step": 397, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:31.566347", "step": 397, "epoch": 1 }, { "type": "loss", "content": 0.03120199404656887, "timestamp": "2025-09-10 02:26:31.567536", "step": 398, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:31.595059", "step": 398, "epoch": 1 }, { "type": "loss", "content": 0.0885075181722641, "timestamp": "2025-09-10 02:26:31.599871", "step": 399, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:31.627923", "step": 399, "epoch": 1 }, { "type": "loss", "content": 0.10204315930604935, "timestamp": "2025-09-10 02:26:31.650766", "step": 400, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:31.678237", "step": 400, "epoch": 1 }, { "type": "loss", "content": 0.012907378375530243, "timestamp": "2025-09-10 02:26:31.679474", "step": 401, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:31.707368", "step": 401, "epoch": 1 }, { "type": "loss", "content": 0.023699553683400154, "timestamp": "2025-09-10 02:26:31.708817", "step": 402, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:31.736605", "step": 402, "epoch": 1 }, { "type": "loss", "content": 0.03839164972305298, "timestamp": "2025-09-10 02:26:31.738485", "step": 403, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:31.766635", "step": 403, "epoch": 1 }, { "type": "loss", "content": 0.03132530674338341, "timestamp": "2025-09-10 02:26:31.789876", "step": 404, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:26:31.818018", "step": 404, "epoch": 1 }, { "type": "loss", "content": 0.032234448939561844, "timestamp": "2025-09-10 02:26:31.819622", "step": 405, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:31.847580", "step": 405, "epoch": 1 }, { "type": "loss", "content": 0.03030545823276043, "timestamp": "2025-09-10 02:26:31.849443", "step": 406, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:31.877609", "step": 406, "epoch": 1 }, { "type": "loss", "content": 0.0347440131008625, "timestamp": "2025-09-10 02:26:31.879350", "step": 407, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:31.907873", "step": 407, "epoch": 1 }, { "type": "loss", "content": 0.039909474551677704, "timestamp": "2025-09-10 02:26:31.930982", "step": 408, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:31.959061", "step": 408, "epoch": 1 }, { "type": "loss", "content": 0.026259543374180794, "timestamp": "2025-09-10 02:26:31.960569", "step": 409, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:31.988342", "step": 409, "epoch": 1 }, { "type": "loss", "content": 0.033669766038656235, "timestamp": "2025-09-10 02:26:31.990124", "step": 410, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:32.018245", "step": 410, "epoch": 1 }, { "type": "loss", "content": 0.04910421371459961, "timestamp": "2025-09-10 02:26:32.020048", "step": 411, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:32.048422", "step": 411, "epoch": 1 }, { "type": "loss", "content": 0.015350817702710629, "timestamp": "2025-09-10 02:26:32.075488", "step": 412, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:32.107118", "step": 412, "epoch": 1 }, { "type": "loss", "content": 0.036262813955545425, "timestamp": "2025-09-10 02:26:32.108702", "step": 413, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:32.137207", "step": 413, "epoch": 1 }, { "type": "loss", "content": 0.0357423797249794, "timestamp": "2025-09-10 02:26:32.138538", "step": 414, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:32.166338", "step": 414, "epoch": 1 }, { "type": "loss", "content": 0.03278939053416252, "timestamp": "2025-09-10 02:26:32.167916", "step": 415, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:32.196029", "step": 415, "epoch": 1 }, { "type": "loss", "content": 0.029303012415766716, "timestamp": "2025-09-10 02:26:32.218913", "step": 416, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:32.251495", "step": 416, "epoch": 1 }, { "type": "loss", "content": 0.07521359622478485, "timestamp": "2025-09-10 02:26:32.253062", "step": 417, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:32.281332", "step": 417, "epoch": 1 }, { "type": "loss", "content": 0.02829148806631565, "timestamp": "2025-09-10 02:26:32.282978", "step": 418, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:32.311001", "step": 418, "epoch": 1 }, { "type": "loss", "content": 0.020268267020583153, "timestamp": "2025-09-10 02:26:32.312470", "step": 419, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:32.340566", "step": 419, "epoch": 1 }, { "type": "loss", "content": 0.07261353731155396, "timestamp": "2025-09-10 02:26:32.363735", "step": 420, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:32.392148", "step": 420, "epoch": 1 }, { "type": "loss", "content": 0.016099663451313972, "timestamp": "2025-09-10 02:26:32.397304", "step": 421, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:32.428720", "step": 421, "epoch": 1 }, { "type": "loss", "content": 0.05036252364516258, "timestamp": "2025-09-10 02:26:32.430471", "step": 422, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:32.459189", "step": 422, "epoch": 1 }, { "type": "loss", "content": 0.04243912547826767, "timestamp": "2025-09-10 02:26:32.461080", "step": 423, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:32.489353", "step": 423, "epoch": 1 }, { "type": "loss", "content": 0.0400727204978466, "timestamp": "2025-09-10 02:26:32.512441", "step": 424, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:32.540980", "step": 424, "epoch": 1 }, { "type": "loss", "content": 0.06823001056909561, "timestamp": "2025-09-10 02:26:32.542800", "step": 425, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:32.571870", "step": 425, "epoch": 1 }, { "type": "loss", "content": 0.04684382677078247, "timestamp": "2025-09-10 02:26:32.573713", "step": 426, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:32.602087", "step": 426, "epoch": 1 }, { "type": "loss", "content": 0.0509195439517498, "timestamp": "2025-09-10 02:26:32.603773", "step": 427, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:32.632027", "step": 427, "epoch": 1 }, { "type": "loss", "content": 0.021338066086173058, "timestamp": "2025-09-10 02:26:32.655272", "step": 428, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:32.683575", "step": 428, "epoch": 1 }, { "type": "loss", "content": 0.01432217936962843, "timestamp": "2025-09-10 02:26:32.685303", "step": 429, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:32.713675", "step": 429, "epoch": 1 }, { "type": "loss", "content": 0.05419403314590454, "timestamp": "2025-09-10 02:26:32.715288", "step": 430, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:32.743675", "step": 430, "epoch": 1 }, { "type": "loss", "content": 0.008366560563445091, "timestamp": "2025-09-10 02:26:32.745515", "step": 431, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:32.773818", "step": 431, "epoch": 1 }, { "type": "loss", "content": 0.013925564475357533, "timestamp": "2025-09-10 02:26:32.796975", "step": 432, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:32.825888", "step": 432, "epoch": 1 }, { "type": "loss", "content": 0.0459279790520668, "timestamp": "2025-09-10 02:26:32.827480", "step": 433, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:32.856834", "step": 433, "epoch": 1 }, { "type": "loss", "content": 0.055198825895786285, "timestamp": "2025-09-10 02:26:32.859418", "step": 434, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:32.890232", "step": 434, "epoch": 1 }, { "type": "loss", "content": 0.03116236999630928, "timestamp": "2025-09-10 02:26:32.891883", "step": 435, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:32.920230", "step": 435, "epoch": 1 }, { "type": "loss", "content": 0.03788456693291664, "timestamp": "2025-09-10 02:26:32.943709", "step": 436, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:32.972208", "step": 436, "epoch": 1 }, { "type": "loss", "content": 0.046811241656541824, "timestamp": "2025-09-10 02:26:32.973770", "step": 437, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:33.001853", "step": 437, "epoch": 1 }, { "type": "loss", "content": 0.04154620319604874, "timestamp": "2025-09-10 02:26:33.003420", "step": 438, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:33.031448", "step": 438, "epoch": 1 }, { "type": "loss", "content": 0.041397593915462494, "timestamp": "2025-09-10 02:26:33.032992", "step": 439, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:33.061118", "step": 439, "epoch": 1 }, { "type": "loss", "content": 0.019992543384432793, "timestamp": "2025-09-10 02:26:33.084098", "step": 440, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:33.115521", "step": 440, "epoch": 1 }, { "type": "loss", "content": 0.049645211547613144, "timestamp": "2025-09-10 02:26:33.116881", "step": 441, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:33.145136", "step": 441, "epoch": 1 }, { "type": "loss", "content": 0.016393227502703667, "timestamp": "2025-09-10 02:26:33.146775", "step": 442, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:33.174723", "step": 442, "epoch": 1 }, { "type": "loss", "content": 0.05730712413787842, "timestamp": "2025-09-10 02:26:33.176407", "step": 443, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:33.204681", "step": 443, "epoch": 1 }, { "type": "loss", "content": 0.037708453834056854, "timestamp": "2025-09-10 02:26:33.228018", "step": 444, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:33.256438", "step": 444, "epoch": 1 }, { "type": "loss", "content": 0.055815424770116806, "timestamp": "2025-09-10 02:26:33.258270", "step": 445, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:33.286694", "step": 445, "epoch": 1 }, { "type": "loss", "content": 0.037254512310028076, "timestamp": "2025-09-10 02:26:33.288254", "step": 446, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:33.316389", "step": 446, "epoch": 1 }, { "type": "loss", "content": 0.02852635085582733, "timestamp": "2025-09-10 02:26:33.318218", "step": 447, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:33.346191", "step": 447, "epoch": 1 }, { "type": "loss", "content": 0.08622664958238602, "timestamp": "2025-09-10 02:26:33.369525", "step": 448, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:33.398301", "step": 448, "epoch": 1 }, { "type": "loss", "content": 0.03732261806726456, "timestamp": "2025-09-10 02:26:33.400137", "step": 449, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:33.428564", "step": 449, "epoch": 1 }, { "type": "loss", "content": 0.033844877034425735, "timestamp": "2025-09-10 02:26:33.430163", "step": 450, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:33.458256", "step": 450, "epoch": 1 }, { "type": "loss", "content": 0.012706798501312733, "timestamp": "2025-09-10 02:26:33.459888", "step": 451, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:33.488587", "step": 451, "epoch": 1 }, { "type": "loss", "content": 0.049883805215358734, "timestamp": "2025-09-10 02:26:33.511874", "step": 452, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:33.540676", "step": 452, "epoch": 1 }, { "type": "loss", "content": 0.042035240679979324, "timestamp": "2025-09-10 02:26:33.542283", "step": 453, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:33.570559", "step": 453, "epoch": 1 }, { "type": "loss", "content": 0.01677936501801014, "timestamp": "2025-09-10 02:26:33.572086", "step": 454, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:33.600106", "step": 454, "epoch": 1 }, { "type": "loss", "content": 0.030457666143774986, "timestamp": "2025-09-10 02:26:33.601580", "step": 455, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:33.629486", "step": 455, "epoch": 1 }, { "type": "loss", "content": 0.07816998660564423, "timestamp": "2025-09-10 02:26:33.652843", "step": 456, "epoch": 1 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 96 ], "batch_size": 8, "flops": 1898404492032 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 5, 80 ], "batch_size": 8, "flops": 1582003754624 } ], "timestamp": "2025-09-10 02:26:35.499397", "step": 456, "epoch": 1 }, { "type": "pplx", "content": 2717367.289805951, "timestamp": "2025-09-10 02:26:35.501821", "step": 456, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:35.529451", "step": 456, "epoch": 1 }, { "type": "loss", "content": 0.016826681792736053, "timestamp": "2025-09-10 02:26:35.531071", "step": 457, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:35.560166", "step": 457, "epoch": 1 }, { "type": "loss", "content": 0.02761121466755867, "timestamp": "2025-09-10 02:26:35.561734", "step": 458, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:35.589860", "step": 458, "epoch": 1 }, { "type": "loss", "content": 0.03611263260245323, "timestamp": "2025-09-10 02:26:35.591796", "step": 459, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:35.622110", "step": 459, "epoch": 1 }, { "type": "loss", "content": 0.03756078705191612, "timestamp": "2025-09-10 02:26:35.645806", "step": 460, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:35.674643", "step": 460, "epoch": 1 }, { "type": "loss", "content": 0.02756829559803009, "timestamp": "2025-09-10 02:26:35.676390", "step": 461, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:35.704519", "step": 461, "epoch": 1 }, { "type": "loss", "content": 0.02360410988330841, "timestamp": "2025-09-10 02:26:35.707313", "step": 462, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:35.736060", "step": 462, "epoch": 1 }, { "type": "loss", "content": 0.03001396730542183, "timestamp": "2025-09-10 02:26:35.737576", "step": 463, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:35.765749", "step": 463, "epoch": 1 }, { "type": "loss", "content": 0.024144025519490242, "timestamp": "2025-09-10 02:26:35.789157", "step": 464, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:35.818311", "step": 464, "epoch": 1 }, { "type": "loss", "content": 0.08317694813013077, "timestamp": "2025-09-10 02:26:35.820179", "step": 465, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:35.848294", "step": 465, "epoch": 1 }, { "type": "loss", "content": 0.03497004508972168, "timestamp": "2025-09-10 02:26:35.849997", "step": 466, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:35.878330", "step": 466, "epoch": 1 }, { "type": "loss", "content": 0.0486321821808815, "timestamp": "2025-09-10 02:26:35.880259", "step": 467, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:35.908964", "step": 467, "epoch": 1 }, { "type": "loss", "content": 0.04043663293123245, "timestamp": "2025-09-10 02:26:35.932102", "step": 468, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:35.960463", "step": 468, "epoch": 1 }, { "type": "loss", "content": 0.026344984769821167, "timestamp": "2025-09-10 02:26:35.962097", "step": 469, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:35.990298", "step": 469, "epoch": 1 }, { "type": "loss", "content": 0.019227301701903343, "timestamp": "2025-09-10 02:26:35.992162", "step": 470, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:36.020548", "step": 470, "epoch": 1 }, { "type": "loss", "content": 0.04276519641280174, "timestamp": "2025-09-10 02:26:36.022406", "step": 471, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:36.050688", "step": 471, "epoch": 1 }, { "type": "loss", "content": 0.042632218450307846, "timestamp": "2025-09-10 02:26:36.074076", "step": 472, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:36.102274", "step": 472, "epoch": 1 }, { "type": "loss", "content": 0.016928521916270256, "timestamp": "2025-09-10 02:26:36.103506", "step": 473, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:36.131556", "step": 473, "epoch": 1 }, { "type": "loss", "content": 0.0740702748298645, "timestamp": "2025-09-10 02:26:36.133879", "step": 474, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:36.161893", "step": 474, "epoch": 1 }, { "type": "loss", "content": 0.022200632840394974, "timestamp": "2025-09-10 02:26:36.163513", "step": 475, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:36.191631", "step": 475, "epoch": 1 }, { "type": "loss", "content": 0.04462885856628418, "timestamp": "2025-09-10 02:26:36.214713", "step": 476, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:36.243147", "step": 476, "epoch": 1 }, { "type": "loss", "content": 0.02617846056818962, "timestamp": "2025-09-10 02:26:36.244801", "step": 477, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:36.272786", "step": 477, "epoch": 1 }, { "type": "loss", "content": 0.043365441262722015, "timestamp": "2025-09-10 02:26:36.274447", "step": 478, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:36.302677", "step": 478, "epoch": 1 }, { "type": "loss", "content": 0.03110249526798725, "timestamp": "2025-09-10 02:26:36.304054", "step": 479, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:36.331895", "step": 479, "epoch": 1 }, { "type": "loss", "content": 0.05477583780884743, "timestamp": "2025-09-10 02:26:36.354760", "step": 480, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:36.383314", "step": 480, "epoch": 1 }, { "type": "loss", "content": 0.026761312037706375, "timestamp": "2025-09-10 02:26:36.384919", "step": 481, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:36.413059", "step": 481, "epoch": 1 }, { "type": "loss", "content": 0.059805192053318024, "timestamp": "2025-09-10 02:26:36.414716", "step": 482, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:36.442850", "step": 482, "epoch": 1 }, { "type": "loss", "content": 0.020811093971133232, "timestamp": "2025-09-10 02:26:36.444388", "step": 483, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:36.472480", "step": 483, "epoch": 1 }, { "type": "loss", "content": 0.057123102247714996, "timestamp": "2025-09-10 02:26:36.496184", "step": 484, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:36.525209", "step": 484, "epoch": 1 }, { "type": "loss", "content": 0.060989223420619965, "timestamp": "2025-09-10 02:26:36.526619", "step": 485, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:36.554471", "step": 485, "epoch": 1 }, { "type": "loss", "content": 0.06589416414499283, "timestamp": "2025-09-10 02:26:36.556596", "step": 486, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:36.584714", "step": 486, "epoch": 1 }, { "type": "loss", "content": 0.03613222762942314, "timestamp": "2025-09-10 02:26:36.586385", "step": 487, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:36.615148", "step": 487, "epoch": 1 }, { "type": "loss", "content": 0.01947481371462345, "timestamp": "2025-09-10 02:26:36.638079", "step": 488, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:36.666505", "step": 488, "epoch": 1 }, { "type": "loss", "content": 0.05823732540011406, "timestamp": "2025-09-10 02:26:36.668053", "step": 489, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:36.696430", "step": 489, "epoch": 1 }, { "type": "loss", "content": 0.020005151629447937, "timestamp": "2025-09-10 02:26:36.698267", "step": 490, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:36.726800", "step": 490, "epoch": 1 }, { "type": "loss", "content": 0.044948723167181015, "timestamp": "2025-09-10 02:26:36.728987", "step": 491, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:36.757039", "step": 491, "epoch": 1 }, { "type": "loss", "content": 0.05590436980128288, "timestamp": "2025-09-10 02:26:36.780315", "step": 492, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:36.808308", "step": 492, "epoch": 1 }, { "type": "loss", "content": 0.07437480986118317, "timestamp": "2025-09-10 02:26:36.809808", "step": 493, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:36.837671", "step": 493, "epoch": 1 }, { "type": "loss", "content": 0.057792190462350845, "timestamp": "2025-09-10 02:26:36.839193", "step": 494, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:36.866904", "step": 494, "epoch": 1 }, { "type": "loss", "content": 0.05404549092054367, "timestamp": "2025-09-10 02:26:36.868490", "step": 495, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:36.896378", "step": 495, "epoch": 1 }, { "type": "loss", "content": 0.02384149469435215, "timestamp": "2025-09-10 02:26:36.919406", "step": 496, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:36.947521", "step": 496, "epoch": 1 }, { "type": "loss", "content": 0.031597964465618134, "timestamp": "2025-09-10 02:26:36.949120", "step": 497, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:36.977087", "step": 497, "epoch": 1 }, { "type": "loss", "content": 0.038688305765390396, "timestamp": "2025-09-10 02:26:36.978762", "step": 498, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:37.007028", "step": 498, "epoch": 1 }, { "type": "loss", "content": 0.030446495860815048, "timestamp": "2025-09-10 02:26:37.008691", "step": 499, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:37.036385", "step": 499, "epoch": 1 }, { "type": "loss", "content": 0.021534087136387825, "timestamp": "2025-09-10 02:26:37.059563", "step": 500, "epoch": 1 }, { "type": "info", "content": "Checkpoint saved at step 500", "timestamp": "2025-09-10 02:26:41.537321", "step": 500, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:41.568606", "step": 500, "epoch": 1 }, { "type": "loss", "content": 0.01570574752986431, "timestamp": "2025-09-10 02:26:41.570127", "step": 501, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:41.598659", "step": 501, "epoch": 1 }, { "type": "loss", "content": 0.02040550298988819, "timestamp": "2025-09-10 02:26:41.600305", "step": 502, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:41.628787", "step": 502, "epoch": 1 }, { "type": "loss", "content": 0.0573415644466877, "timestamp": "2025-09-10 02:26:41.630595", "step": 503, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:41.659119", "step": 503, "epoch": 1 }, { "type": "loss", "content": 0.030473779886960983, "timestamp": "2025-09-10 02:26:41.682585", "step": 504, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:41.711042", "step": 504, "epoch": 1 }, { "type": "loss", "content": 0.03402290865778923, "timestamp": "2025-09-10 02:26:41.712577", "step": 505, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:41.740446", "step": 505, "epoch": 1 }, { "type": "loss", "content": 0.03856948763132095, "timestamp": "2025-09-10 02:26:41.742043", "step": 506, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:41.770446", "step": 506, "epoch": 1 }, { "type": "loss", "content": 0.03185379132628441, "timestamp": "2025-09-10 02:26:41.772222", "step": 507, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:41.800894", "step": 507, "epoch": 1 }, { "type": "loss", "content": 0.05316280573606491, "timestamp": "2025-09-10 02:26:41.823772", "step": 508, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:41.852227", "step": 508, "epoch": 1 }, { "type": "loss", "content": 0.024660510942339897, "timestamp": "2025-09-10 02:26:41.853425", "step": 509, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:41.881382", "step": 509, "epoch": 1 }, { "type": "loss", "content": 0.04275422543287277, "timestamp": "2025-09-10 02:26:41.883014", "step": 510, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:41.911031", "step": 510, "epoch": 1 }, { "type": "loss", "content": 0.04156554862856865, "timestamp": "2025-09-10 02:26:41.912686", "step": 511, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:41.940694", "step": 511, "epoch": 1 }, { "type": "loss", "content": 0.03838549181818962, "timestamp": "2025-09-10 02:26:41.963968", "step": 512, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:41.992188", "step": 512, "epoch": 1 }, { "type": "loss", "content": 0.04556984454393387, "timestamp": "2025-09-10 02:26:41.993830", "step": 513, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:42.022203", "step": 513, "epoch": 1 }, { "type": "loss", "content": 0.04985576868057251, "timestamp": "2025-09-10 02:26:42.023714", "step": 514, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:42.051442", "step": 514, "epoch": 1 }, { "type": "loss", "content": 0.0277482271194458, "timestamp": "2025-09-10 02:26:42.053184", "step": 515, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:42.081291", "step": 515, "epoch": 1 }, { "type": "loss", "content": 0.048427700996398926, "timestamp": "2025-09-10 02:26:42.104531", "step": 516, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:42.132991", "step": 516, "epoch": 1 }, { "type": "loss", "content": 0.007362372241914272, "timestamp": "2025-09-10 02:26:42.134766", "step": 517, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:42.163506", "step": 517, "epoch": 1 }, { "type": "loss", "content": 0.0382639579474926, "timestamp": "2025-09-10 02:26:42.165013", "step": 518, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:42.193298", "step": 518, "epoch": 1 }, { "type": "loss", "content": 0.03767085075378418, "timestamp": "2025-09-10 02:26:42.195534", "step": 519, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:42.224009", "step": 519, "epoch": 1 }, { "type": "loss", "content": 0.03013140894472599, "timestamp": "2025-09-10 02:26:42.247118", "step": 520, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:42.275353", "step": 520, "epoch": 1 }, { "type": "loss", "content": 0.016971739009022713, "timestamp": "2025-09-10 02:26:42.276865", "step": 521, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:42.304896", "step": 521, "epoch": 1 }, { "type": "loss", "content": 0.02043677680194378, "timestamp": "2025-09-10 02:26:42.306589", "step": 522, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:42.335270", "step": 522, "epoch": 1 }, { "type": "loss", "content": 0.046038419008255005, "timestamp": "2025-09-10 02:26:42.336806", "step": 523, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:42.365067", "step": 523, "epoch": 1 }, { "type": "loss", "content": 0.029047995805740356, "timestamp": "2025-09-10 02:26:42.388138", "step": 524, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:42.416710", "step": 524, "epoch": 1 }, { "type": "loss", "content": 0.012068702839314938, "timestamp": "2025-09-10 02:26:42.418482", "step": 525, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:42.447067", "step": 525, "epoch": 1 }, { "type": "loss", "content": 0.074915811419487, "timestamp": "2025-09-10 02:26:42.448479", "step": 526, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:42.476557", "step": 526, "epoch": 1 }, { "type": "loss", "content": 0.025899339467287064, "timestamp": "2025-09-10 02:26:42.478115", "step": 527, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:42.506268", "step": 527, "epoch": 1 }, { "type": "loss", "content": 0.027209777384996414, "timestamp": "2025-09-10 02:26:42.529353", "step": 528, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:42.558126", "step": 528, "epoch": 1 }, { "type": "loss", "content": 0.0507054440677166, "timestamp": "2025-09-10 02:26:42.559543", "step": 529, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:42.587824", "step": 529, "epoch": 1 }, { "type": "loss", "content": 0.06465242058038712, "timestamp": "2025-09-10 02:26:42.589472", "step": 530, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:42.618335", "step": 530, "epoch": 1 }, { "type": "loss", "content": 0.013187861070036888, "timestamp": "2025-09-10 02:26:42.620011", "step": 531, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:42.648790", "step": 531, "epoch": 1 }, { "type": "loss", "content": 0.039449822157621384, "timestamp": "2025-09-10 02:26:42.671710", "step": 532, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:42.700993", "step": 532, "epoch": 1 }, { "type": "loss", "content": 0.0234070997685194, "timestamp": "2025-09-10 02:26:42.702553", "step": 533, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:42.731036", "step": 533, "epoch": 1 }, { "type": "loss", "content": 0.05278443172574043, "timestamp": "2025-09-10 02:26:42.732818", "step": 534, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:42.761049", "step": 534, "epoch": 1 }, { "type": "loss", "content": 0.03800437971949577, "timestamp": "2025-09-10 02:26:42.762490", "step": 535, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:42.790796", "step": 535, "epoch": 1 }, { "type": "loss", "content": 0.03744731470942497, "timestamp": "2025-09-10 02:26:42.814055", "step": 536, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:42.842521", "step": 536, "epoch": 1 }, { "type": "loss", "content": 0.020335109904408455, "timestamp": "2025-09-10 02:26:42.844324", "step": 537, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:42.872482", "step": 537, "epoch": 1 }, { "type": "loss", "content": 0.026498381048440933, "timestamp": "2025-09-10 02:26:42.873948", "step": 538, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:42.902204", "step": 538, "epoch": 1 }, { "type": "loss", "content": 0.021354462951421738, "timestamp": "2025-09-10 02:26:42.904079", "step": 539, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:42.932522", "step": 539, "epoch": 1 }, { "type": "loss", "content": 0.023156922310590744, "timestamp": "2025-09-10 02:26:42.955560", "step": 540, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:42.984199", "step": 540, "epoch": 1 }, { "type": "loss", "content": 0.03991435095667839, "timestamp": "2025-09-10 02:26:42.985766", "step": 541, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:43.014073", "step": 541, "epoch": 1 }, { "type": "loss", "content": 0.08322066068649292, "timestamp": "2025-09-10 02:26:43.015751", "step": 542, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:43.044092", "step": 542, "epoch": 1 }, { "type": "loss", "content": 0.045775383710861206, "timestamp": "2025-09-10 02:26:43.045490", "step": 543, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:43.073719", "step": 543, "epoch": 1 }, { "type": "loss", "content": 0.04141625016927719, "timestamp": "2025-09-10 02:26:43.096638", "step": 544, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:43.125323", "step": 544, "epoch": 1 }, { "type": "loss", "content": 0.053145766258239746, "timestamp": "2025-09-10 02:26:43.126932", "step": 545, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:43.155397", "step": 545, "epoch": 1 }, { "type": "loss", "content": 0.031390417367219925, "timestamp": "2025-09-10 02:26:43.156779", "step": 546, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:43.185162", "step": 546, "epoch": 1 }, { "type": "loss", "content": 0.028599724173545837, "timestamp": "2025-09-10 02:26:43.186943", "step": 547, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:43.215096", "step": 547, "epoch": 1 }, { "type": "loss", "content": 0.01685398444533348, "timestamp": "2025-09-10 02:26:43.238252", "step": 548, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:43.271233", "step": 548, "epoch": 1 }, { "type": "loss", "content": 0.01568339765071869, "timestamp": "2025-09-10 02:26:43.272866", "step": 549, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:43.301708", "step": 549, "epoch": 1 }, { "type": "loss", "content": 0.024174261838197708, "timestamp": "2025-09-10 02:26:43.303375", "step": 550, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:43.332073", "step": 550, "epoch": 1 }, { "type": "loss", "content": 0.012761001475155354, "timestamp": "2025-09-10 02:26:43.333506", "step": 551, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:43.362030", "step": 551, "epoch": 1 }, { "type": "loss", "content": 0.029998183250427246, "timestamp": "2025-09-10 02:26:43.385488", "step": 552, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:43.414645", "step": 552, "epoch": 1 }, { "type": "loss", "content": 0.022816693410277367, "timestamp": "2025-09-10 02:26:43.416679", "step": 553, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:43.445258", "step": 553, "epoch": 1 }, { "type": "loss", "content": 0.05389130488038063, "timestamp": "2025-09-10 02:26:43.447204", "step": 554, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:43.475768", "step": 554, "epoch": 1 }, { "type": "loss", "content": 0.02855268493294716, "timestamp": "2025-09-10 02:26:43.477464", "step": 555, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:43.505718", "step": 555, "epoch": 1 }, { "type": "loss", "content": 0.02061382308602333, "timestamp": "2025-09-10 02:26:43.529083", "step": 556, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:43.557476", "step": 556, "epoch": 1 }, { "type": "loss", "content": 0.08206148445606232, "timestamp": "2025-09-10 02:26:43.559541", "step": 557, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:43.587944", "step": 557, "epoch": 1 }, { "type": "loss", "content": 0.02579513192176819, "timestamp": "2025-09-10 02:26:43.589636", "step": 558, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:43.618052", "step": 558, "epoch": 1 }, { "type": "loss", "content": 0.009594475850462914, "timestamp": "2025-09-10 02:26:43.619915", "step": 559, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:43.648792", "step": 559, "epoch": 1 }, { "type": "loss", "content": 0.04478087276220322, "timestamp": "2025-09-10 02:26:43.671998", "step": 560, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:43.700559", "step": 560, "epoch": 1 }, { "type": "loss", "content": 0.028195369988679886, "timestamp": "2025-09-10 02:26:43.702105", "step": 561, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:43.730458", "step": 561, "epoch": 1 }, { "type": "loss", "content": 0.04168400168418884, "timestamp": "2025-09-10 02:26:43.732410", "step": 562, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:43.761456", "step": 562, "epoch": 1 }, { "type": "loss", "content": 0.06364897638559341, "timestamp": "2025-09-10 02:26:43.763142", "step": 563, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:43.792127", "step": 563, "epoch": 1 }, { "type": "loss", "content": 0.03534334525465965, "timestamp": "2025-09-10 02:26:43.815196", "step": 564, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:43.843839", "step": 564, "epoch": 1 }, { "type": "loss", "content": 0.03911682218313217, "timestamp": "2025-09-10 02:26:43.845380", "step": 565, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:43.874083", "step": 565, "epoch": 1 }, { "type": "loss", "content": 0.05347789078950882, "timestamp": "2025-09-10 02:26:43.876052", "step": 566, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:43.904725", "step": 566, "epoch": 1 }, { "type": "loss", "content": 0.02387906052172184, "timestamp": "2025-09-10 02:26:43.906387", "step": 567, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:43.935015", "step": 567, "epoch": 1 }, { "type": "loss", "content": 0.03277410939335823, "timestamp": "2025-09-10 02:26:43.958345", "step": 568, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:43.986785", "step": 568, "epoch": 1 }, { "type": "loss", "content": 0.014015858992934227, "timestamp": "2025-09-10 02:26:43.988868", "step": 569, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:44.017811", "step": 569, "epoch": 1 }, { "type": "loss", "content": 0.0363222174346447, "timestamp": "2025-09-10 02:26:44.019760", "step": 570, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:44.049975", "step": 570, "epoch": 1 }, { "type": "loss", "content": 0.03212037682533264, "timestamp": "2025-09-10 02:26:44.051786", "step": 571, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:44.080092", "step": 571, "epoch": 1 }, { "type": "loss", "content": 0.036108482629060745, "timestamp": "2025-09-10 02:26:44.113427", "step": 572, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:44.147330", "step": 572, "epoch": 1 }, { "type": "loss", "content": 0.05427970737218857, "timestamp": "2025-09-10 02:26:44.148934", "step": 573, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:44.185849", "step": 573, "epoch": 1 }, { "type": "loss", "content": 0.0445655882358551, "timestamp": "2025-09-10 02:26:44.187633", "step": 574, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:44.216105", "step": 574, "epoch": 1 }, { "type": "loss", "content": 0.013648323714733124, "timestamp": "2025-09-10 02:26:44.217890", "step": 575, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:44.246656", "step": 575, "epoch": 1 }, { "type": "loss", "content": 0.048125237226486206, "timestamp": "2025-09-10 02:26:44.272245", "step": 576, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:44.301237", "step": 576, "epoch": 1 }, { "type": "loss", "content": 0.03517252579331398, "timestamp": "2025-09-10 02:26:44.303187", "step": 577, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:44.331943", "step": 577, "epoch": 1 }, { "type": "loss", "content": 0.026025895029306412, "timestamp": "2025-09-10 02:26:44.333916", "step": 578, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:44.367308", "step": 578, "epoch": 1 }, { "type": "loss", "content": 0.010249711573123932, "timestamp": "2025-09-10 02:26:44.368942", "step": 579, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:44.397354", "step": 579, "epoch": 1 }, { "type": "loss", "content": 0.037203043699264526, "timestamp": "2025-09-10 02:26:44.425016", "step": 580, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:44.453711", "step": 580, "epoch": 1 }, { "type": "loss", "content": 0.027322134003043175, "timestamp": "2025-09-10 02:26:44.455532", "step": 581, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:44.486534", "step": 581, "epoch": 1 }, { "type": "loss", "content": 0.05180314928293228, "timestamp": "2025-09-10 02:26:44.488365", "step": 582, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:44.516992", "step": 582, "epoch": 1 }, { "type": "loss", "content": 0.04153513163328171, "timestamp": "2025-09-10 02:26:44.518892", "step": 583, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:44.547695", "step": 583, "epoch": 1 }, { "type": "loss", "content": 0.01963025890290737, "timestamp": "2025-09-10 02:26:44.575161", "step": 584, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:44.607489", "step": 584, "epoch": 1 }, { "type": "loss", "content": 0.028999345377087593, "timestamp": "2025-09-10 02:26:44.608907", "step": 585, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:44.637714", "step": 585, "epoch": 1 }, { "type": "loss", "content": 0.01062434446066618, "timestamp": "2025-09-10 02:26:44.639332", "step": 586, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:44.668319", "step": 586, "epoch": 1 }, { "type": "loss", "content": 0.047068577259778976, "timestamp": "2025-09-10 02:26:44.670027", "step": 587, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:44.698001", "step": 587, "epoch": 1 }, { "type": "loss", "content": 0.018893277272582054, "timestamp": "2025-09-10 02:26:44.721191", "step": 588, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:26:44.757206", "step": 588, "epoch": 1 }, { "type": "loss", "content": 0.03721915930509567, "timestamp": "2025-09-10 02:26:44.758884", "step": 589, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:44.789131", "step": 589, "epoch": 1 }, { "type": "loss", "content": 0.02848803997039795, "timestamp": "2025-09-10 02:26:44.790837", "step": 590, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:44.819290", "step": 590, "epoch": 1 }, { "type": "loss", "content": 0.019638491794466972, "timestamp": "2025-09-10 02:26:44.821212", "step": 591, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:44.850107", "step": 591, "epoch": 1 }, { "type": "loss", "content": 0.03575807437300682, "timestamp": "2025-09-10 02:26:44.874463", "step": 592, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:44.903444", "step": 592, "epoch": 1 }, { "type": "loss", "content": 0.027394594624638557, "timestamp": "2025-09-10 02:26:44.905074", "step": 593, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:44.933905", "step": 593, "epoch": 1 }, { "type": "loss", "content": 0.02229316346347332, "timestamp": "2025-09-10 02:26:44.935774", "step": 594, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:44.964452", "step": 594, "epoch": 1 }, { "type": "loss", "content": 0.02307887002825737, "timestamp": "2025-09-10 02:26:44.966155", "step": 595, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:44.994886", "step": 595, "epoch": 1 }, { "type": "loss", "content": 0.01366735901683569, "timestamp": "2025-09-10 02:26:45.018222", "step": 596, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:45.047055", "step": 596, "epoch": 1 }, { "type": "loss", "content": 0.046775348484516144, "timestamp": "2025-09-10 02:26:45.048939", "step": 597, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:45.077744", "step": 597, "epoch": 1 }, { "type": "loss", "content": 0.015637096017599106, "timestamp": "2025-09-10 02:26:45.079685", "step": 598, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:45.108499", "step": 598, "epoch": 1 }, { "type": "loss", "content": 0.015831107273697853, "timestamp": "2025-09-10 02:26:45.110363", "step": 599, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:45.139372", "step": 599, "epoch": 1 }, { "type": "loss", "content": 0.04828057810664177, "timestamp": "2025-09-10 02:26:45.162873", "step": 600, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:45.191474", "step": 600, "epoch": 1 }, { "type": "loss", "content": 0.039840757846832275, "timestamp": "2025-09-10 02:26:45.193168", "step": 601, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:45.221727", "step": 601, "epoch": 1 }, { "type": "loss", "content": 0.0489020049571991, "timestamp": "2025-09-10 02:26:45.223404", "step": 602, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:45.252583", "step": 602, "epoch": 1 }, { "type": "loss", "content": 0.008196599781513214, "timestamp": "2025-09-10 02:26:45.254015", "step": 603, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:45.282326", "step": 603, "epoch": 1 }, { "type": "loss", "content": 0.033972881734371185, "timestamp": "2025-09-10 02:26:45.305506", "step": 604, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:45.335052", "step": 604, "epoch": 1 }, { "type": "loss", "content": 0.03189618140459061, "timestamp": "2025-09-10 02:26:45.336930", "step": 605, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:45.365521", "step": 605, "epoch": 1 }, { "type": "loss", "content": 0.03479809686541557, "timestamp": "2025-09-10 02:26:45.367988", "step": 606, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:45.396696", "step": 606, "epoch": 1 }, { "type": "loss", "content": 0.008106947876513004, "timestamp": "2025-09-10 02:26:45.398696", "step": 607, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:45.427353", "step": 607, "epoch": 1 }, { "type": "loss", "content": 0.04672331362962723, "timestamp": "2025-09-10 02:26:45.450618", "step": 608, "epoch": 1 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 96 ], "batch_size": 8, "flops": 1898404492032 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 5, 80 ], "batch_size": 8, "flops": 1582003754624 } ], "timestamp": "2025-09-10 02:26:47.291541", "step": 608, "epoch": 1 }, { "type": "pplx", "content": 2200986.452202289, "timestamp": "2025-09-10 02:26:47.293495", "step": 608, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:47.321364", "step": 608, "epoch": 1 }, { "type": "loss", "content": 0.07109908759593964, "timestamp": "2025-09-10 02:26:47.323033", "step": 609, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:47.351547", "step": 609, "epoch": 1 }, { "type": "loss", "content": 0.0423298217356205, "timestamp": "2025-09-10 02:26:47.352995", "step": 610, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:47.381791", "step": 610, "epoch": 1 }, { "type": "loss", "content": 0.015477584674954414, "timestamp": "2025-09-10 02:26:47.383254", "step": 611, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:47.412143", "step": 611, "epoch": 1 }, { "type": "loss", "content": 0.010698787868022919, "timestamp": "2025-09-10 02:26:47.435218", "step": 612, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:47.463670", "step": 612, "epoch": 1 }, { "type": "loss", "content": 0.03782622888684273, "timestamp": "2025-09-10 02:26:47.465354", "step": 613, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:47.494574", "step": 613, "epoch": 1 }, { "type": "loss", "content": 0.047737814486026764, "timestamp": "2025-09-10 02:26:47.496084", "step": 614, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:47.524379", "step": 614, "epoch": 1 }, { "type": "loss", "content": 0.009725173935294151, "timestamp": "2025-09-10 02:26:47.526432", "step": 615, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:47.555581", "step": 615, "epoch": 1 }, { "type": "loss", "content": 0.006624232046306133, "timestamp": "2025-09-10 02:26:47.579249", "step": 616, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:47.607571", "step": 616, "epoch": 1 }, { "type": "loss", "content": 0.046601273119449615, "timestamp": "2025-09-10 02:26:47.609253", "step": 617, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:47.638308", "step": 617, "epoch": 1 }, { "type": "loss", "content": 0.031758103519678116, "timestamp": "2025-09-10 02:26:47.640190", "step": 618, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:47.668808", "step": 618, "epoch": 1 }, { "type": "loss", "content": 0.0243473369628191, "timestamp": "2025-09-10 02:26:47.670406", "step": 619, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:47.698825", "step": 619, "epoch": 1 }, { "type": "loss", "content": 0.018934383988380432, "timestamp": "2025-09-10 02:26:47.722157", "step": 620, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:47.751167", "step": 620, "epoch": 1 }, { "type": "loss", "content": 0.0067651644349098206, "timestamp": "2025-09-10 02:26:47.752961", "step": 621, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:47.781712", "step": 621, "epoch": 1 }, { "type": "loss", "content": 0.02667141892015934, "timestamp": "2025-09-10 02:26:47.783431", "step": 622, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:47.811983", "step": 622, "epoch": 1 }, { "type": "loss", "content": 0.04350658506155014, "timestamp": "2025-09-10 02:26:47.813851", "step": 623, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:47.842399", "step": 623, "epoch": 1 }, { "type": "loss", "content": 0.0501113124191761, "timestamp": "2025-09-10 02:26:47.865832", "step": 624, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:47.894643", "step": 624, "epoch": 1 }, { "type": "loss", "content": 0.061473067849874496, "timestamp": "2025-09-10 02:26:47.896345", "step": 625, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:47.924558", "step": 625, "epoch": 1 }, { "type": "loss", "content": 0.03871607780456543, "timestamp": "2025-09-10 02:26:47.926386", "step": 626, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:47.954495", "step": 626, "epoch": 1 }, { "type": "loss", "content": 0.03448708355426788, "timestamp": "2025-09-10 02:26:47.955903", "step": 627, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:47.983580", "step": 627, "epoch": 1 }, { "type": "loss", "content": 0.0213655773550272, "timestamp": "2025-09-10 02:26:48.006853", "step": 628, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:48.036037", "step": 628, "epoch": 1 }, { "type": "loss", "content": 0.022643353790044785, "timestamp": "2025-09-10 02:26:48.037597", "step": 629, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:48.065825", "step": 629, "epoch": 1 }, { "type": "loss", "content": 0.04520813003182411, "timestamp": "2025-09-10 02:26:48.067593", "step": 630, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:48.096649", "step": 630, "epoch": 1 }, { "type": "loss", "content": 0.04208702594041824, "timestamp": "2025-09-10 02:26:48.098437", "step": 631, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:48.127257", "step": 631, "epoch": 1 }, { "type": "loss", "content": 0.045431990176439285, "timestamp": "2025-09-10 02:26:48.150642", "step": 632, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:48.179534", "step": 632, "epoch": 1 }, { "type": "loss", "content": 0.01084569375962019, "timestamp": "2025-09-10 02:26:48.181145", "step": 633, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:48.209489", "step": 633, "epoch": 1 }, { "type": "loss", "content": 0.049830012023448944, "timestamp": "2025-09-10 02:26:48.211225", "step": 634, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:48.243221", "step": 634, "epoch": 1 }, { "type": "loss", "content": 0.03610464185476303, "timestamp": "2025-09-10 02:26:48.245009", "step": 635, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:48.273148", "step": 635, "epoch": 1 }, { "type": "loss", "content": 0.028134096413850784, "timestamp": "2025-09-10 02:26:48.296275", "step": 636, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:48.324879", "step": 636, "epoch": 1 }, { "type": "loss", "content": 0.00924620684236288, "timestamp": "2025-09-10 02:26:48.326769", "step": 637, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:48.355328", "step": 637, "epoch": 1 }, { "type": "loss", "content": 0.03791581466794014, "timestamp": "2025-09-10 02:26:48.357050", "step": 638, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:48.385959", "step": 638, "epoch": 1 }, { "type": "loss", "content": 0.02533513866364956, "timestamp": "2025-09-10 02:26:48.387588", "step": 639, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:48.416118", "step": 639, "epoch": 1 }, { "type": "loss", "content": 0.037173282355070114, "timestamp": "2025-09-10 02:26:48.439274", "step": 640, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:48.468541", "step": 640, "epoch": 1 }, { "type": "loss", "content": 0.026109185069799423, "timestamp": "2025-09-10 02:26:48.470084", "step": 641, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:48.498801", "step": 641, "epoch": 1 }, { "type": "loss", "content": 0.019872980192303658, "timestamp": "2025-09-10 02:26:48.500314", "step": 642, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:48.528802", "step": 642, "epoch": 1 }, { "type": "loss", "content": 0.04102848097681999, "timestamp": "2025-09-10 02:26:48.530524", "step": 643, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:48.559413", "step": 643, "epoch": 1 }, { "type": "loss", "content": 0.057591069489717484, "timestamp": "2025-09-10 02:26:48.582748", "step": 644, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:48.611840", "step": 644, "epoch": 1 }, { "type": "loss", "content": 0.06546728312969208, "timestamp": "2025-09-10 02:26:48.613714", "step": 645, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:48.642318", "step": 645, "epoch": 1 }, { "type": "loss", "content": 0.02737610973417759, "timestamp": "2025-09-10 02:26:48.644304", "step": 646, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:48.673191", "step": 646, "epoch": 1 }, { "type": "loss", "content": 0.026363076642155647, "timestamp": "2025-09-10 02:26:48.674817", "step": 647, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:48.703577", "step": 647, "epoch": 1 }, { "type": "loss", "content": 0.036277834326028824, "timestamp": "2025-09-10 02:26:48.726689", "step": 648, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:48.755769", "step": 648, "epoch": 1 }, { "type": "loss", "content": 0.03815680742263794, "timestamp": "2025-09-10 02:26:48.757495", "step": 649, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:48.785888", "step": 649, "epoch": 1 }, { "type": "loss", "content": 0.05288299545645714, "timestamp": "2025-09-10 02:26:48.787603", "step": 650, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:48.816012", "step": 650, "epoch": 1 }, { "type": "loss", "content": 0.007451296783983707, "timestamp": "2025-09-10 02:26:48.817826", "step": 651, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:48.846628", "step": 651, "epoch": 1 }, { "type": "loss", "content": 0.018843041732907295, "timestamp": "2025-09-10 02:26:48.869876", "step": 652, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:48.898911", "step": 652, "epoch": 1 }, { "type": "loss", "content": 0.013613715767860413, "timestamp": "2025-09-10 02:26:48.900363", "step": 653, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:48.928360", "step": 653, "epoch": 1 }, { "type": "loss", "content": 0.06748726963996887, "timestamp": "2025-09-10 02:26:48.929862", "step": 654, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:48.957990", "step": 654, "epoch": 1 }, { "type": "loss", "content": 0.043075766414403915, "timestamp": "2025-09-10 02:26:48.959391", "step": 655, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:48.987570", "step": 655, "epoch": 1 }, { "type": "loss", "content": 0.042788174003362656, "timestamp": "2025-09-10 02:26:49.010596", "step": 656, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:49.039316", "step": 656, "epoch": 1 }, { "type": "loss", "content": 0.055226586759090424, "timestamp": "2025-09-10 02:26:49.040912", "step": 657, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:49.068828", "step": 657, "epoch": 1 }, { "type": "loss", "content": 0.053308092057704926, "timestamp": "2025-09-10 02:26:49.070686", "step": 658, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:49.099403", "step": 658, "epoch": 1 }, { "type": "loss", "content": 0.028021065518260002, "timestamp": "2025-09-10 02:26:49.101193", "step": 659, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:49.129822", "step": 659, "epoch": 1 }, { "type": "loss", "content": 0.032105498015880585, "timestamp": "2025-09-10 02:26:49.153282", "step": 660, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:49.182065", "step": 660, "epoch": 1 }, { "type": "loss", "content": 0.04760170355439186, "timestamp": "2025-09-10 02:26:49.183890", "step": 661, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:49.212643", "step": 661, "epoch": 1 }, { "type": "loss", "content": 0.02774801477789879, "timestamp": "2025-09-10 02:26:49.214414", "step": 662, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:49.242896", "step": 662, "epoch": 1 }, { "type": "loss", "content": 0.05581825226545334, "timestamp": "2025-09-10 02:26:49.244781", "step": 663, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:49.272715", "step": 663, "epoch": 1 }, { "type": "loss", "content": 0.009996329434216022, "timestamp": "2025-09-10 02:26:49.295831", "step": 664, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:49.324296", "step": 664, "epoch": 1 }, { "type": "loss", "content": 0.015302867628633976, "timestamp": "2025-09-10 02:26:49.325907", "step": 665, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:49.353981", "step": 665, "epoch": 1 }, { "type": "loss", "content": 0.014631493017077446, "timestamp": "2025-09-10 02:26:49.355922", "step": 666, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:49.384685", "step": 666, "epoch": 1 }, { "type": "loss", "content": 0.03862696886062622, "timestamp": "2025-09-10 02:26:49.386372", "step": 667, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:49.415127", "step": 667, "epoch": 1 }, { "type": "loss", "content": 0.026202142238616943, "timestamp": "2025-09-10 02:26:49.438485", "step": 668, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:49.467023", "step": 668, "epoch": 1 }, { "type": "loss", "content": 0.03486243262887001, "timestamp": "2025-09-10 02:26:49.468711", "step": 669, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:49.497397", "step": 669, "epoch": 1 }, { "type": "loss", "content": 0.023656947538256645, "timestamp": "2025-09-10 02:26:49.498743", "step": 670, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:49.527268", "step": 670, "epoch": 1 }, { "type": "loss", "content": 0.031210143119096756, "timestamp": "2025-09-10 02:26:49.528829", "step": 671, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:49.557469", "step": 671, "epoch": 1 }, { "type": "loss", "content": 0.007307054009288549, "timestamp": "2025-09-10 02:26:49.580396", "step": 672, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:49.609705", "step": 672, "epoch": 1 }, { "type": "loss", "content": 0.025393787771463394, "timestamp": "2025-09-10 02:26:49.611422", "step": 673, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:49.639948", "step": 673, "epoch": 1 }, { "type": "loss", "content": 0.04881792888045311, "timestamp": "2025-09-10 02:26:49.641661", "step": 674, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:49.669590", "step": 674, "epoch": 1 }, { "type": "loss", "content": 0.006452545057982206, "timestamp": "2025-09-10 02:26:49.671096", "step": 675, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:49.699870", "step": 675, "epoch": 1 }, { "type": "loss", "content": 0.0486336275935173, "timestamp": "2025-09-10 02:26:49.723222", "step": 676, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:49.751719", "step": 676, "epoch": 1 }, { "type": "loss", "content": 0.06550680845975876, "timestamp": "2025-09-10 02:26:49.753426", "step": 677, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:49.782394", "step": 677, "epoch": 1 }, { "type": "loss", "content": 0.058530163019895554, "timestamp": "2025-09-10 02:26:49.784240", "step": 678, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:49.813042", "step": 678, "epoch": 1 }, { "type": "loss", "content": 0.0352046824991703, "timestamp": "2025-09-10 02:26:49.814771", "step": 679, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:49.843133", "step": 679, "epoch": 1 }, { "type": "loss", "content": 0.020520886406302452, "timestamp": "2025-09-10 02:26:49.866393", "step": 680, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:49.894804", "step": 680, "epoch": 1 }, { "type": "loss", "content": 0.011987773701548576, "timestamp": "2025-09-10 02:26:49.896333", "step": 681, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:49.924330", "step": 681, "epoch": 1 }, { "type": "loss", "content": 0.023767616599798203, "timestamp": "2025-09-10 02:26:49.926027", "step": 682, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:49.955016", "step": 682, "epoch": 1 }, { "type": "loss", "content": 0.021541643887758255, "timestamp": "2025-09-10 02:26:49.956569", "step": 683, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:49.984250", "step": 683, "epoch": 1 }, { "type": "loss", "content": 0.015927335247397423, "timestamp": "2025-09-10 02:26:50.007246", "step": 684, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:50.035920", "step": 684, "epoch": 1 }, { "type": "loss", "content": 0.010418176651000977, "timestamp": "2025-09-10 02:26:50.037275", "step": 685, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:50.065636", "step": 685, "epoch": 1 }, { "type": "loss", "content": 0.0358634777367115, "timestamp": "2025-09-10 02:26:50.067184", "step": 686, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:50.095420", "step": 686, "epoch": 1 }, { "type": "loss", "content": 0.03563470393419266, "timestamp": "2025-09-10 02:26:50.096846", "step": 687, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:50.125020", "step": 687, "epoch": 1 }, { "type": "loss", "content": 0.024202287197113037, "timestamp": "2025-09-10 02:26:50.148289", "step": 688, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:50.176645", "step": 688, "epoch": 1 }, { "type": "loss", "content": 0.0377209298312664, "timestamp": "2025-09-10 02:26:50.178424", "step": 689, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:50.206812", "step": 689, "epoch": 1 }, { "type": "loss", "content": 0.020676741376519203, "timestamp": "2025-09-10 02:26:50.208292", "step": 690, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:50.236682", "step": 690, "epoch": 1 }, { "type": "loss", "content": 0.06705940514802933, "timestamp": "2025-09-10 02:26:50.238295", "step": 691, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:50.266571", "step": 691, "epoch": 1 }, { "type": "loss", "content": 0.0945725068449974, "timestamp": "2025-09-10 02:26:50.289528", "step": 692, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:50.317965", "step": 692, "epoch": 1 }, { "type": "loss", "content": 0.033295098692178726, "timestamp": "2025-09-10 02:26:50.319377", "step": 693, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:50.347257", "step": 693, "epoch": 1 }, { "type": "loss", "content": 0.037053436040878296, "timestamp": "2025-09-10 02:26:50.348812", "step": 694, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:50.376989", "step": 694, "epoch": 1 }, { "type": "loss", "content": 0.021943015977740288, "timestamp": "2025-09-10 02:26:50.378569", "step": 695, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:50.406669", "step": 695, "epoch": 1 }, { "type": "loss", "content": 0.026456695050001144, "timestamp": "2025-09-10 02:26:50.429853", "step": 696, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:50.457872", "step": 696, "epoch": 1 }, { "type": "loss", "content": 0.03228648751974106, "timestamp": "2025-09-10 02:26:50.459470", "step": 697, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:50.487012", "step": 697, "epoch": 1 }, { "type": "loss", "content": 0.023058131337165833, "timestamp": "2025-09-10 02:26:50.488465", "step": 698, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:50.516163", "step": 698, "epoch": 1 }, { "type": "loss", "content": 0.030382486060261726, "timestamp": "2025-09-10 02:26:50.517577", "step": 699, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:50.545948", "step": 699, "epoch": 1 }, { "type": "loss", "content": 0.02017112448811531, "timestamp": "2025-09-10 02:26:50.569146", "step": 700, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:50.597610", "step": 700, "epoch": 1 }, { "type": "loss", "content": 0.016261307522654533, "timestamp": "2025-09-10 02:26:50.599127", "step": 701, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:50.628167", "step": 701, "epoch": 1 }, { "type": "loss", "content": 0.04979781433939934, "timestamp": "2025-09-10 02:26:50.629878", "step": 702, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:50.658549", "step": 702, "epoch": 1 }, { "type": "loss", "content": 0.03741488605737686, "timestamp": "2025-09-10 02:26:50.660198", "step": 703, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:50.688713", "step": 703, "epoch": 1 }, { "type": "loss", "content": 0.032305363565683365, "timestamp": "2025-09-10 02:26:50.711837", "step": 704, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:50.741072", "step": 704, "epoch": 1 }, { "type": "loss", "content": 0.008190718479454517, "timestamp": "2025-09-10 02:26:50.742721", "step": 705, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:50.770795", "step": 705, "epoch": 1 }, { "type": "loss", "content": 0.03497549146413803, "timestamp": "2025-09-10 02:26:50.772293", "step": 706, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:50.800601", "step": 706, "epoch": 1 }, { "type": "loss", "content": 0.032218027859926224, "timestamp": "2025-09-10 02:26:50.801985", "step": 707, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:50.830096", "step": 707, "epoch": 1 }, { "type": "loss", "content": 0.02115151286125183, "timestamp": "2025-09-10 02:26:50.853368", "step": 708, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:50.881547", "step": 708, "epoch": 1 }, { "type": "loss", "content": 0.03463287279009819, "timestamp": "2025-09-10 02:26:50.883151", "step": 709, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:50.911659", "step": 709, "epoch": 1 }, { "type": "loss", "content": 0.0242976825684309, "timestamp": "2025-09-10 02:26:50.913106", "step": 710, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:50.941378", "step": 710, "epoch": 1 }, { "type": "loss", "content": 0.0387067086994648, "timestamp": "2025-09-10 02:26:50.943066", "step": 711, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:50.971672", "step": 711, "epoch": 1 }, { "type": "loss", "content": 0.0348440445959568, "timestamp": "2025-09-10 02:26:50.994804", "step": 712, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:51.023788", "step": 712, "epoch": 1 }, { "type": "loss", "content": 0.06683380901813507, "timestamp": "2025-09-10 02:26:51.025261", "step": 713, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:51.053725", "step": 713, "epoch": 1 }, { "type": "loss", "content": 0.020580554381012917, "timestamp": "2025-09-10 02:26:51.055205", "step": 714, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:51.083768", "step": 714, "epoch": 1 }, { "type": "loss", "content": 0.0237856637686491, "timestamp": "2025-09-10 02:26:51.085241", "step": 715, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:51.113530", "step": 715, "epoch": 1 }, { "type": "loss", "content": 0.013393201865255833, "timestamp": "2025-09-10 02:26:51.136848", "step": 716, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:51.165110", "step": 716, "epoch": 1 }, { "type": "loss", "content": 0.037130821496248245, "timestamp": "2025-09-10 02:26:51.166682", "step": 717, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:51.194854", "step": 717, "epoch": 1 }, { "type": "loss", "content": 0.03772103041410446, "timestamp": "2025-09-10 02:26:51.196396", "step": 718, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:51.224736", "step": 718, "epoch": 1 }, { "type": "loss", "content": 0.012475842610001564, "timestamp": "2025-09-10 02:26:51.226341", "step": 719, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:51.255110", "step": 719, "epoch": 1 }, { "type": "loss", "content": 0.03414268419146538, "timestamp": "2025-09-10 02:26:51.278112", "step": 720, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:51.306978", "step": 720, "epoch": 1 }, { "type": "loss", "content": 0.05466434359550476, "timestamp": "2025-09-10 02:26:51.308617", "step": 721, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:51.337372", "step": 721, "epoch": 1 }, { "type": "loss", "content": 0.04362777993083, "timestamp": "2025-09-10 02:26:51.339113", "step": 722, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:51.367864", "step": 722, "epoch": 1 }, { "type": "loss", "content": 0.028265880420804024, "timestamp": "2025-09-10 02:26:51.369365", "step": 723, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:51.397803", "step": 723, "epoch": 1 }, { "type": "loss", "content": 0.007118005305528641, "timestamp": "2025-09-10 02:26:51.420943", "step": 724, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:51.449569", "step": 724, "epoch": 1 }, { "type": "loss", "content": 0.03422413393855095, "timestamp": "2025-09-10 02:26:51.451038", "step": 725, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:51.479143", "step": 725, "epoch": 1 }, { "type": "loss", "content": 0.03792322799563408, "timestamp": "2025-09-10 02:26:51.480799", "step": 726, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:51.508998", "step": 726, "epoch": 1 }, { "type": "loss", "content": 0.04148111492395401, "timestamp": "2025-09-10 02:26:51.510379", "step": 727, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:51.538681", "step": 727, "epoch": 1 }, { "type": "loss", "content": 0.05498867854475975, "timestamp": "2025-09-10 02:26:51.561876", "step": 728, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:51.590170", "step": 728, "epoch": 1 }, { "type": "loss", "content": 0.04803735017776489, "timestamp": "2025-09-10 02:26:51.591844", "step": 729, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:51.620032", "step": 729, "epoch": 1 }, { "type": "loss", "content": 0.02832743152976036, "timestamp": "2025-09-10 02:26:51.621455", "step": 730, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:51.649972", "step": 730, "epoch": 1 }, { "type": "loss", "content": 0.02841213345527649, "timestamp": "2025-09-10 02:26:51.651532", "step": 731, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:51.679910", "step": 731, "epoch": 1 }, { "type": "loss", "content": 0.035199183970689774, "timestamp": "2025-09-10 02:26:51.702997", "step": 732, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:51.732103", "step": 732, "epoch": 1 }, { "type": "loss", "content": 0.03662240505218506, "timestamp": "2025-09-10 02:26:51.733618", "step": 733, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:51.761974", "step": 733, "epoch": 1 }, { "type": "loss", "content": 0.01077954936772585, "timestamp": "2025-09-10 02:26:51.763242", "step": 734, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:51.791632", "step": 734, "epoch": 1 }, { "type": "loss", "content": 0.02980462647974491, "timestamp": "2025-09-10 02:26:51.793223", "step": 735, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:51.822116", "step": 735, "epoch": 1 }, { "type": "loss", "content": 0.04685065522789955, "timestamp": "2025-09-10 02:26:51.845259", "step": 736, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:51.873383", "step": 736, "epoch": 1 }, { "type": "loss", "content": 0.02059471607208252, "timestamp": "2025-09-10 02:26:51.875008", "step": 737, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:51.903465", "step": 737, "epoch": 1 }, { "type": "loss", "content": 0.016841420903801918, "timestamp": "2025-09-10 02:26:51.905080", "step": 738, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:51.933906", "step": 738, "epoch": 1 }, { "type": "loss", "content": 0.022859051823616028, "timestamp": "2025-09-10 02:26:51.935583", "step": 739, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:51.964162", "step": 739, "epoch": 1 }, { "type": "loss", "content": 0.015584629960358143, "timestamp": "2025-09-10 02:26:51.987156", "step": 740, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:52.015724", "step": 740, "epoch": 1 }, { "type": "loss", "content": 0.0430554524064064, "timestamp": "2025-09-10 02:26:52.017231", "step": 741, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:52.045908", "step": 741, "epoch": 1 }, { "type": "loss", "content": 0.05952511355280876, "timestamp": "2025-09-10 02:26:52.047394", "step": 742, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:52.075681", "step": 742, "epoch": 1 }, { "type": "loss", "content": 0.0038539913948625326, "timestamp": "2025-09-10 02:26:52.077220", "step": 743, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:52.105938", "step": 743, "epoch": 1 }, { "type": "loss", "content": 0.029965851455926895, "timestamp": "2025-09-10 02:26:52.128888", "step": 744, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:52.157289", "step": 744, "epoch": 1 }, { "type": "loss", "content": 0.07923837006092072, "timestamp": "2025-09-10 02:26:52.158999", "step": 745, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:52.188457", "step": 745, "epoch": 1 }, { "type": "loss", "content": 0.01898501254618168, "timestamp": "2025-09-10 02:26:52.190365", "step": 746, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:52.219837", "step": 746, "epoch": 1 }, { "type": "loss", "content": 0.02514803595840931, "timestamp": "2025-09-10 02:26:52.221387", "step": 747, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:52.250204", "step": 747, "epoch": 1 }, { "type": "loss", "content": 0.04136563092470169, "timestamp": "2025-09-10 02:26:52.273373", "step": 748, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:52.301589", "step": 748, "epoch": 1 }, { "type": "loss", "content": 0.03028266690671444, "timestamp": "2025-09-10 02:26:52.303179", "step": 749, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:52.331645", "step": 749, "epoch": 1 }, { "type": "loss", "content": 0.04092634096741676, "timestamp": "2025-09-10 02:26:52.332998", "step": 750, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:52.361165", "step": 750, "epoch": 1 }, { "type": "loss", "content": 0.06992150843143463, "timestamp": "2025-09-10 02:26:52.362733", "step": 751, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:52.390625", "step": 751, "epoch": 1 }, { "type": "loss", "content": 0.015275141224265099, "timestamp": "2025-09-10 02:26:52.413551", "step": 752, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:52.442333", "step": 752, "epoch": 1 }, { "type": "loss", "content": 0.01683962717652321, "timestamp": "2025-09-10 02:26:52.443854", "step": 753, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:52.472139", "step": 753, "epoch": 1 }, { "type": "loss", "content": 0.035905104130506516, "timestamp": "2025-09-10 02:26:52.473531", "step": 754, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:52.501697", "step": 754, "epoch": 1 }, { "type": "loss", "content": 0.04694758728146553, "timestamp": "2025-09-10 02:26:52.503196", "step": 755, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:52.531662", "step": 755, "epoch": 1 }, { "type": "loss", "content": 0.029720274731516838, "timestamp": "2025-09-10 02:26:52.554611", "step": 756, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:52.583282", "step": 756, "epoch": 1 }, { "type": "loss", "content": 0.02850966341793537, "timestamp": "2025-09-10 02:26:52.584859", "step": 757, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:52.612987", "step": 757, "epoch": 1 }, { "type": "loss", "content": 0.07100068032741547, "timestamp": "2025-09-10 02:26:52.614646", "step": 758, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:52.643105", "step": 758, "epoch": 1 }, { "type": "loss", "content": 0.008930629119277, "timestamp": "2025-09-10 02:26:52.644591", "step": 759, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:52.673084", "step": 759, "epoch": 1 }, { "type": "loss", "content": 0.05873224884271622, "timestamp": "2025-09-10 02:26:52.696219", "step": 760, "epoch": 1 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 96 ], "batch_size": 8, "flops": 1898404492032 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 5, 80 ], "batch_size": 8, "flops": 1582003754624 } ], "timestamp": "2025-09-10 02:26:54.548540", "step": 760, "epoch": 1 }, { "type": "pplx", "content": 2503157.170217035, "timestamp": "2025-09-10 02:26:54.550020", "step": 760, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:54.577915", "step": 760, "epoch": 1 }, { "type": "loss", "content": 0.020794304087758064, "timestamp": "2025-09-10 02:26:54.579372", "step": 761, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:54.608202", "step": 761, "epoch": 1 }, { "type": "loss", "content": 0.02302137203514576, "timestamp": "2025-09-10 02:26:54.609839", "step": 762, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:54.638283", "step": 762, "epoch": 1 }, { "type": "loss", "content": 0.049961064010858536, "timestamp": "2025-09-10 02:26:54.639681", "step": 763, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:54.667812", "step": 763, "epoch": 1 }, { "type": "loss", "content": 0.07963868230581284, "timestamp": "2025-09-10 02:26:54.690824", "step": 764, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:54.719395", "step": 764, "epoch": 1 }, { "type": "loss", "content": 0.03662387281656265, "timestamp": "2025-09-10 02:26:54.720937", "step": 765, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:54.749291", "step": 765, "epoch": 1 }, { "type": "loss", "content": 0.043657511472702026, "timestamp": "2025-09-10 02:26:54.750756", "step": 766, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:54.779230", "step": 766, "epoch": 1 }, { "type": "loss", "content": 0.03749134764075279, "timestamp": "2025-09-10 02:26:54.780610", "step": 767, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:54.808711", "step": 767, "epoch": 1 }, { "type": "loss", "content": 0.025386493653059006, "timestamp": "2025-09-10 02:26:54.831874", "step": 768, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:54.860558", "step": 768, "epoch": 1 }, { "type": "loss", "content": 0.01552271656692028, "timestamp": "2025-09-10 02:26:54.862021", "step": 769, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:54.890902", "step": 769, "epoch": 1 }, { "type": "loss", "content": 0.036365240812301636, "timestamp": "2025-09-10 02:26:54.892427", "step": 770, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:54.921241", "step": 770, "epoch": 1 }, { "type": "loss", "content": 0.008484335616230965, "timestamp": "2025-09-10 02:26:54.922839", "step": 771, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:54.951076", "step": 771, "epoch": 1 }, { "type": "loss", "content": 0.04429873079061508, "timestamp": "2025-09-10 02:26:54.974195", "step": 772, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:55.002711", "step": 772, "epoch": 1 }, { "type": "loss", "content": 0.02399025857448578, "timestamp": "2025-09-10 02:26:55.004149", "step": 773, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:55.032233", "step": 773, "epoch": 1 }, { "type": "loss", "content": 0.036831699311733246, "timestamp": "2025-09-10 02:26:55.033645", "step": 774, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:55.061729", "step": 774, "epoch": 1 }, { "type": "loss", "content": 0.019087400287389755, "timestamp": "2025-09-10 02:26:55.063295", "step": 775, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:55.091778", "step": 775, "epoch": 1 }, { "type": "loss", "content": 0.009040270932018757, "timestamp": "2025-09-10 02:26:55.114634", "step": 776, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:55.143205", "step": 776, "epoch": 1 }, { "type": "loss", "content": 0.015383010730147362, "timestamp": "2025-09-10 02:26:55.144759", "step": 777, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:55.172903", "step": 777, "epoch": 1 }, { "type": "loss", "content": 0.022596066817641258, "timestamp": "2025-09-10 02:26:55.174376", "step": 778, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:55.203278", "step": 778, "epoch": 1 }, { "type": "loss", "content": 0.08443605154752731, "timestamp": "2025-09-10 02:26:55.204576", "step": 779, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:55.232936", "step": 779, "epoch": 1 }, { "type": "loss", "content": 0.01622471585869789, "timestamp": "2025-09-10 02:26:55.255979", "step": 780, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:55.284729", "step": 780, "epoch": 1 }, { "type": "loss", "content": 0.041626691818237305, "timestamp": "2025-09-10 02:26:55.286156", "step": 781, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:55.314236", "step": 781, "epoch": 1 }, { "type": "loss", "content": 0.0428607352077961, "timestamp": "2025-09-10 02:26:55.315703", "step": 782, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:55.343806", "step": 782, "epoch": 1 }, { "type": "loss", "content": 0.03353489935398102, "timestamp": "2025-09-10 02:26:55.345389", "step": 783, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:55.374106", "step": 783, "epoch": 1 }, { "type": "loss", "content": 0.013459078967571259, "timestamp": "2025-09-10 02:26:55.397388", "step": 784, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:55.426018", "step": 784, "epoch": 1 }, { "type": "loss", "content": 0.014677911065518856, "timestamp": "2025-09-10 02:26:55.427567", "step": 785, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:55.455690", "step": 785, "epoch": 1 }, { "type": "loss", "content": 0.0209831353276968, "timestamp": "2025-09-10 02:26:55.456974", "step": 786, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:55.484965", "step": 786, "epoch": 1 }, { "type": "loss", "content": 0.036396000534296036, "timestamp": "2025-09-10 02:26:55.486602", "step": 787, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:55.514945", "step": 787, "epoch": 1 }, { "type": "loss", "content": 0.03313400223851204, "timestamp": "2025-09-10 02:26:55.537842", "step": 788, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:55.566416", "step": 788, "epoch": 1 }, { "type": "loss", "content": 0.022417547181248665, "timestamp": "2025-09-10 02:26:55.567848", "step": 789, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:55.596416", "step": 789, "epoch": 1 }, { "type": "loss", "content": 0.016005048528313637, "timestamp": "2025-09-10 02:26:55.597973", "step": 790, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:55.626100", "step": 790, "epoch": 1 }, { "type": "loss", "content": 0.049527037888765335, "timestamp": "2025-09-10 02:26:55.627650", "step": 791, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:55.655576", "step": 791, "epoch": 1 }, { "type": "loss", "content": 0.04059341177344322, "timestamp": "2025-09-10 02:26:55.678702", "step": 792, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:55.707378", "step": 792, "epoch": 1 }, { "type": "loss", "content": 0.04651249572634697, "timestamp": "2025-09-10 02:26:55.708942", "step": 793, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:55.737621", "step": 793, "epoch": 1 }, { "type": "loss", "content": 0.04483857750892639, "timestamp": "2025-09-10 02:26:55.739355", "step": 794, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:55.770030", "step": 794, "epoch": 1 }, { "type": "loss", "content": 0.007778738159686327, "timestamp": "2025-09-10 02:26:55.771432", "step": 795, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:55.800335", "step": 795, "epoch": 1 }, { "type": "loss", "content": 0.011393926106393337, "timestamp": "2025-09-10 02:26:55.823834", "step": 796, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:55.853266", "step": 796, "epoch": 1 }, { "type": "loss", "content": 0.02515154890716076, "timestamp": "2025-09-10 02:26:55.854858", "step": 797, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:55.883187", "step": 797, "epoch": 1 }, { "type": "loss", "content": 0.034160640090703964, "timestamp": "2025-09-10 02:26:55.884661", "step": 798, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:55.913163", "step": 798, "epoch": 1 }, { "type": "loss", "content": 0.010150982066988945, "timestamp": "2025-09-10 02:26:55.914951", "step": 799, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:55.943338", "step": 799, "epoch": 1 }, { "type": "loss", "content": 0.013213365338742733, "timestamp": "2025-09-10 02:26:55.966528", "step": 800, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:55.995288", "step": 800, "epoch": 1 }, { "type": "loss", "content": 0.005620269570499659, "timestamp": "2025-09-10 02:26:55.996766", "step": 801, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:56.025235", "step": 801, "epoch": 1 }, { "type": "loss", "content": 0.0072153410874307156, "timestamp": "2025-09-10 02:26:56.026799", "step": 802, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:56.054985", "step": 802, "epoch": 1 }, { "type": "loss", "content": 0.026846928521990776, "timestamp": "2025-09-10 02:26:56.056759", "step": 803, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:56.084982", "step": 803, "epoch": 1 }, { "type": "loss", "content": 0.04960143193602562, "timestamp": "2025-09-10 02:26:56.107994", "step": 804, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:56.136673", "step": 804, "epoch": 1 }, { "type": "loss", "content": 0.0130831403657794, "timestamp": "2025-09-10 02:26:56.138023", "step": 805, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:56.166808", "step": 805, "epoch": 1 }, { "type": "loss", "content": 0.025565098971128464, "timestamp": "2025-09-10 02:26:56.168815", "step": 806, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:56.197806", "step": 806, "epoch": 1 }, { "type": "loss", "content": 0.027919035404920578, "timestamp": "2025-09-10 02:26:56.199264", "step": 807, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:56.227908", "step": 807, "epoch": 1 }, { "type": "loss", "content": 0.05793684348464012, "timestamp": "2025-09-10 02:26:56.251174", "step": 808, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:56.279689", "step": 808, "epoch": 1 }, { "type": "loss", "content": 0.04225584864616394, "timestamp": "2025-09-10 02:26:56.281236", "step": 809, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:56.311362", "step": 809, "epoch": 1 }, { "type": "loss", "content": 0.01058135274797678, "timestamp": "2025-09-10 02:26:56.313019", "step": 810, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:56.342087", "step": 810, "epoch": 1 }, { "type": "loss", "content": 0.0357813760638237, "timestamp": "2025-09-10 02:26:56.344028", "step": 811, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:56.373216", "step": 811, "epoch": 1 }, { "type": "loss", "content": 0.025642748922109604, "timestamp": "2025-09-10 02:26:56.396589", "step": 812, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:56.425545", "step": 812, "epoch": 1 }, { "type": "loss", "content": 0.024076486006379128, "timestamp": "2025-09-10 02:26:56.427250", "step": 813, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:56.455943", "step": 813, "epoch": 1 }, { "type": "loss", "content": 0.026659991592168808, "timestamp": "2025-09-10 02:26:56.457961", "step": 814, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:56.486793", "step": 814, "epoch": 1 }, { "type": "loss", "content": 0.005625136662274599, "timestamp": "2025-09-10 02:26:56.489885", "step": 815, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:56.521733", "step": 815, "epoch": 1 }, { "type": "loss", "content": 0.05507013946771622, "timestamp": "2025-09-10 02:26:56.545080", "step": 816, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:56.574042", "step": 816, "epoch": 1 }, { "type": "loss", "content": 0.04457852989435196, "timestamp": "2025-09-10 02:26:56.575662", "step": 817, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:56.603926", "step": 817, "epoch": 1 }, { "type": "loss", "content": 0.01740320399403572, "timestamp": "2025-09-10 02:26:56.605442", "step": 818, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:56.633790", "step": 818, "epoch": 1 }, { "type": "loss", "content": 0.011634668335318565, "timestamp": "2025-09-10 02:26:56.635563", "step": 819, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:56.664053", "step": 819, "epoch": 1 }, { "type": "loss", "content": 0.0116079431027174, "timestamp": "2025-09-10 02:26:56.687293", "step": 820, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:56.715854", "step": 820, "epoch": 1 }, { "type": "loss", "content": 0.035347651690244675, "timestamp": "2025-09-10 02:26:56.717193", "step": 821, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:56.745555", "step": 821, "epoch": 1 }, { "type": "loss", "content": 0.0406646691262722, "timestamp": "2025-09-10 02:26:56.746890", "step": 822, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:56.775320", "step": 822, "epoch": 1 }, { "type": "loss", "content": 0.003157601458951831, "timestamp": "2025-09-10 02:26:56.776707", "step": 823, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:56.805168", "step": 823, "epoch": 1 }, { "type": "loss", "content": 0.0253426693379879, "timestamp": "2025-09-10 02:26:56.828211", "step": 824, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:56.856754", "step": 824, "epoch": 1 }, { "type": "loss", "content": 0.03272155672311783, "timestamp": "2025-09-10 02:26:56.858472", "step": 825, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:56.887504", "step": 825, "epoch": 1 }, { "type": "loss", "content": 0.03958987444639206, "timestamp": "2025-09-10 02:26:56.889192", "step": 826, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:56.918050", "step": 826, "epoch": 1 }, { "type": "loss", "content": 0.019212845712900162, "timestamp": "2025-09-10 02:26:56.919780", "step": 827, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:56.948559", "step": 827, "epoch": 1 }, { "type": "loss", "content": 0.007750155869871378, "timestamp": "2025-09-10 02:26:56.971964", "step": 828, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:57.000895", "step": 828, "epoch": 1 }, { "type": "loss", "content": 0.046940695494413376, "timestamp": "2025-09-10 02:26:57.002562", "step": 829, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:57.031079", "step": 829, "epoch": 1 }, { "type": "loss", "content": 0.06961827725172043, "timestamp": "2025-09-10 02:26:57.032686", "step": 830, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:57.060940", "step": 830, "epoch": 1 }, { "type": "loss", "content": 0.04929071292281151, "timestamp": "2025-09-10 02:26:57.062552", "step": 831, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:57.090902", "step": 831, "epoch": 1 }, { "type": "loss", "content": 0.028989458456635475, "timestamp": "2025-09-10 02:26:57.114201", "step": 832, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:57.142375", "step": 832, "epoch": 1 }, { "type": "loss", "content": 0.06238769739866257, "timestamp": "2025-09-10 02:26:57.143794", "step": 833, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:57.172558", "step": 833, "epoch": 1 }, { "type": "loss", "content": 0.032351333647966385, "timestamp": "2025-09-10 02:26:57.173915", "step": 834, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:57.202245", "step": 834, "epoch": 1 }, { "type": "loss", "content": 0.05260155349969864, "timestamp": "2025-09-10 02:26:57.203623", "step": 835, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:57.231949", "step": 835, "epoch": 1 }, { "type": "loss", "content": 0.07337234914302826, "timestamp": "2025-09-10 02:26:57.254968", "step": 836, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:57.283641", "step": 836, "epoch": 1 }, { "type": "loss", "content": 0.02990635298192501, "timestamp": "2025-09-10 02:26:57.284964", "step": 837, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:57.313134", "step": 837, "epoch": 1 }, { "type": "loss", "content": 0.011495047248899937, "timestamp": "2025-09-10 02:26:57.314780", "step": 838, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:57.344778", "step": 838, "epoch": 1 }, { "type": "loss", "content": 0.07979269325733185, "timestamp": "2025-09-10 02:26:57.346378", "step": 839, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:57.375843", "step": 839, "epoch": 1 }, { "type": "loss", "content": 0.007071019150316715, "timestamp": "2025-09-10 02:26:57.399071", "step": 840, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:57.427834", "step": 840, "epoch": 1 }, { "type": "loss", "content": 0.028329063206911087, "timestamp": "2025-09-10 02:26:57.429384", "step": 841, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:57.457911", "step": 841, "epoch": 1 }, { "type": "loss", "content": 0.0028442800976336002, "timestamp": "2025-09-10 02:26:57.459590", "step": 842, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:57.488580", "step": 842, "epoch": 1 }, { "type": "loss", "content": 0.004879837390035391, "timestamp": "2025-09-10 02:26:57.490514", "step": 843, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:57.519550", "step": 843, "epoch": 1 }, { "type": "loss", "content": 0.03970589488744736, "timestamp": "2025-09-10 02:26:57.542861", "step": 844, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:26:57.572161", "step": 844, "epoch": 1 }, { "type": "loss", "content": 0.011552110314369202, "timestamp": "2025-09-10 02:26:57.573779", "step": 845, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:57.602524", "step": 845, "epoch": 1 }, { "type": "loss", "content": 0.005572037305682898, "timestamp": "2025-09-10 02:26:57.604230", "step": 846, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:57.632628", "step": 846, "epoch": 1 }, { "type": "loss", "content": 0.024660101160407066, "timestamp": "2025-09-10 02:26:57.634239", "step": 847, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:57.662928", "step": 847, "epoch": 1 }, { "type": "loss", "content": 0.010990379378199577, "timestamp": "2025-09-10 02:26:57.686262", "step": 848, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:57.715047", "step": 848, "epoch": 1 }, { "type": "loss", "content": 0.02281826175749302, "timestamp": "2025-09-10 02:26:57.716448", "step": 849, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:57.744947", "step": 849, "epoch": 1 }, { "type": "loss", "content": 0.05251472443342209, "timestamp": "2025-09-10 02:26:57.746690", "step": 850, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:57.775116", "step": 850, "epoch": 1 }, { "type": "loss", "content": 0.001956451218575239, "timestamp": "2025-09-10 02:26:57.776778", "step": 851, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:57.805487", "step": 851, "epoch": 1 }, { "type": "loss", "content": 0.03003103658556938, "timestamp": "2025-09-10 02:26:57.828694", "step": 852, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:57.857641", "step": 852, "epoch": 1 }, { "type": "loss", "content": 0.038373447954654694, "timestamp": "2025-09-10 02:26:57.859206", "step": 853, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:57.887907", "step": 853, "epoch": 1 }, { "type": "loss", "content": 0.017338624224066734, "timestamp": "2025-09-10 02:26:57.889383", "step": 854, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:57.917855", "step": 854, "epoch": 1 }, { "type": "loss", "content": 0.028965888544917107, "timestamp": "2025-09-10 02:26:57.919217", "step": 855, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:57.947750", "step": 855, "epoch": 1 }, { "type": "loss", "content": 0.01129152625799179, "timestamp": "2025-09-10 02:26:57.971105", "step": 856, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:57.999907", "step": 856, "epoch": 1 }, { "type": "loss", "content": 0.018316298723220825, "timestamp": "2025-09-10 02:26:58.001399", "step": 857, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:58.029631", "step": 857, "epoch": 1 }, { "type": "loss", "content": 0.040882717818021774, "timestamp": "2025-09-10 02:26:58.031262", "step": 858, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:58.059571", "step": 858, "epoch": 1 }, { "type": "loss", "content": 0.04055514559149742, "timestamp": "2025-09-10 02:26:58.061167", "step": 859, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:58.089540", "step": 859, "epoch": 1 }, { "type": "loss", "content": 0.08326940983533859, "timestamp": "2025-09-10 02:26:58.112736", "step": 860, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:58.141039", "step": 860, "epoch": 1 }, { "type": "loss", "content": 0.04865090176463127, "timestamp": "2025-09-10 02:26:58.142449", "step": 861, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:58.171250", "step": 861, "epoch": 1 }, { "type": "loss", "content": 0.016379257664084435, "timestamp": "2025-09-10 02:26:58.172849", "step": 862, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:58.201208", "step": 862, "epoch": 1 }, { "type": "loss", "content": 0.07710827142000198, "timestamp": "2025-09-10 02:26:58.202791", "step": 863, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:58.230971", "step": 863, "epoch": 1 }, { "type": "loss", "content": 0.027466127648949623, "timestamp": "2025-09-10 02:26:58.254276", "step": 864, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:58.282593", "step": 864, "epoch": 1 }, { "type": "loss", "content": 0.03628736361861229, "timestamp": "2025-09-10 02:26:58.284275", "step": 865, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:58.313466", "step": 865, "epoch": 1 }, { "type": "loss", "content": 0.026968399062752724, "timestamp": "2025-09-10 02:26:58.315050", "step": 866, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:58.343378", "step": 866, "epoch": 1 }, { "type": "loss", "content": 0.041775938123464584, "timestamp": "2025-09-10 02:26:58.345033", "step": 867, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:58.373602", "step": 867, "epoch": 1 }, { "type": "loss", "content": 0.047613587230443954, "timestamp": "2025-09-10 02:26:58.396737", "step": 868, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:58.425338", "step": 868, "epoch": 1 }, { "type": "loss", "content": 0.05883047729730606, "timestamp": "2025-09-10 02:26:58.426964", "step": 869, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:58.455404", "step": 869, "epoch": 1 }, { "type": "loss", "content": 0.06252393126487732, "timestamp": "2025-09-10 02:26:58.456885", "step": 870, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:58.484735", "step": 870, "epoch": 1 }, { "type": "loss", "content": 0.015127004124224186, "timestamp": "2025-09-10 02:26:58.485914", "step": 871, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:58.514780", "step": 871, "epoch": 1 }, { "type": "loss", "content": 0.01448144856840372, "timestamp": "2025-09-10 02:26:58.539561", "step": 872, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:26:58.572959", "step": 872, "epoch": 1 }, { "type": "loss", "content": 0.06627927720546722, "timestamp": "2025-09-10 02:26:58.574374", "step": 873, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:58.603531", "step": 873, "epoch": 1 }, { "type": "loss", "content": 0.019005369395017624, "timestamp": "2025-09-10 02:26:58.605178", "step": 874, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:58.634262", "step": 874, "epoch": 1 }, { "type": "loss", "content": 0.02605721540749073, "timestamp": "2025-09-10 02:26:58.636434", "step": 875, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:58.664789", "step": 875, "epoch": 1 }, { "type": "loss", "content": 0.03991827368736267, "timestamp": "2025-09-10 02:26:58.688019", "step": 876, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:58.716716", "step": 876, "epoch": 1 }, { "type": "loss", "content": 0.03819936141371727, "timestamp": "2025-09-10 02:26:58.718837", "step": 877, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:58.747422", "step": 877, "epoch": 1 }, { "type": "loss", "content": 0.0238080732524395, "timestamp": "2025-09-10 02:26:58.749301", "step": 878, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:58.778063", "step": 878, "epoch": 1 }, { "type": "loss", "content": 0.023295262828469276, "timestamp": "2025-09-10 02:26:58.782160", "step": 879, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:58.814474", "step": 879, "epoch": 1 }, { "type": "loss", "content": 0.043232087045907974, "timestamp": "2025-09-10 02:26:58.837418", "step": 880, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:58.866795", "step": 880, "epoch": 1 }, { "type": "loss", "content": 0.03321755677461624, "timestamp": "2025-09-10 02:26:58.867959", "step": 881, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:58.918272", "step": 881, "epoch": 1 }, { "type": "loss", "content": 0.03598690778017044, "timestamp": "2025-09-10 02:26:58.919779", "step": 882, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:58.959653", "step": 882, "epoch": 1 }, { "type": "loss", "content": 0.032034873962402344, "timestamp": "2025-09-10 02:26:58.961079", "step": 883, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:58.989528", "step": 883, "epoch": 1 }, { "type": "loss", "content": 0.024193478748202324, "timestamp": "2025-09-10 02:26:59.019158", "step": 884, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:59.048228", "step": 884, "epoch": 1 }, { "type": "loss", "content": 0.02453945204615593, "timestamp": "2025-09-10 02:26:59.049847", "step": 885, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:59.078136", "step": 885, "epoch": 1 }, { "type": "loss", "content": 0.021976953372359276, "timestamp": "2025-09-10 02:26:59.079740", "step": 886, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:59.108089", "step": 886, "epoch": 1 }, { "type": "loss", "content": 0.029496390372514725, "timestamp": "2025-09-10 02:26:59.109965", "step": 887, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:59.138397", "step": 887, "epoch": 1 }, { "type": "loss", "content": 0.04946603253483772, "timestamp": "2025-09-10 02:26:59.161913", "step": 888, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:59.190814", "step": 888, "epoch": 1 }, { "type": "loss", "content": 0.05130714550614357, "timestamp": "2025-09-10 02:26:59.192630", "step": 889, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:59.229788", "step": 889, "epoch": 1 }, { "type": "loss", "content": 0.008475149050354958, "timestamp": "2025-09-10 02:26:59.231398", "step": 890, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:26:59.279503", "step": 890, "epoch": 1 }, { "type": "loss", "content": 0.06713636219501495, "timestamp": "2025-09-10 02:26:59.281228", "step": 891, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:59.309801", "step": 891, "epoch": 1 }, { "type": "loss", "content": 0.010133733041584492, "timestamp": "2025-09-10 02:26:59.333035", "step": 892, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:59.361789", "step": 892, "epoch": 1 }, { "type": "loss", "content": 0.016804341226816177, "timestamp": "2025-09-10 02:26:59.363385", "step": 893, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:59.391648", "step": 893, "epoch": 1 }, { "type": "loss", "content": 0.04320318624377251, "timestamp": "2025-09-10 02:26:59.393321", "step": 894, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:59.421898", "step": 894, "epoch": 1 }, { "type": "loss", "content": 0.05202284827828407, "timestamp": "2025-09-10 02:26:59.423531", "step": 895, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:59.456764", "step": 895, "epoch": 1 }, { "type": "loss", "content": 0.039947301149368286, "timestamp": "2025-09-10 02:26:59.480192", "step": 896, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:59.508979", "step": 896, "epoch": 1 }, { "type": "loss", "content": 0.04276236519217491, "timestamp": "2025-09-10 02:26:59.510718", "step": 897, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:59.538986", "step": 897, "epoch": 1 }, { "type": "loss", "content": 0.05527576431632042, "timestamp": "2025-09-10 02:26:59.540380", "step": 898, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:59.568843", "step": 898, "epoch": 1 }, { "type": "loss", "content": 0.04893497750163078, "timestamp": "2025-09-10 02:26:59.570093", "step": 899, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:59.598704", "step": 899, "epoch": 1 }, { "type": "loss", "content": 0.036729227751493454, "timestamp": "2025-09-10 02:26:59.621797", "step": 900, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:59.650523", "step": 900, "epoch": 1 }, { "type": "loss", "content": 0.013712167739868164, "timestamp": "2025-09-10 02:26:59.652162", "step": 901, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:59.680523", "step": 901, "epoch": 1 }, { "type": "loss", "content": 0.04306911304593086, "timestamp": "2025-09-10 02:26:59.682064", "step": 902, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:59.710335", "step": 902, "epoch": 1 }, { "type": "loss", "content": 0.027140114456415176, "timestamp": "2025-09-10 02:26:59.711924", "step": 903, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:59.740712", "step": 903, "epoch": 1 }, { "type": "loss", "content": 0.020640883594751358, "timestamp": "2025-09-10 02:26:59.763980", "step": 904, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:59.792590", "step": 904, "epoch": 1 }, { "type": "loss", "content": 0.022836310788989067, "timestamp": "2025-09-10 02:26:59.794209", "step": 905, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:59.823280", "step": 905, "epoch": 1 }, { "type": "loss", "content": 0.03370488062500954, "timestamp": "2025-09-10 02:26:59.824985", "step": 906, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:59.853309", "step": 906, "epoch": 1 }, { "type": "loss", "content": 0.04225897043943405, "timestamp": "2025-09-10 02:26:59.855144", "step": 907, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:59.883893", "step": 907, "epoch": 1 }, { "type": "loss", "content": 0.008355808444321156, "timestamp": "2025-09-10 02:26:59.907023", "step": 908, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:59.935479", "step": 908, "epoch": 1 }, { "type": "loss", "content": 0.04681595414876938, "timestamp": "2025-09-10 02:26:59.937251", "step": 909, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:26:59.965718", "step": 909, "epoch": 1 }, { "type": "loss", "content": 0.03790203854441643, "timestamp": "2025-09-10 02:26:59.967415", "step": 910, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:26:59.996145", "step": 910, "epoch": 1 }, { "type": "loss", "content": 0.0279624555259943, "timestamp": "2025-09-10 02:26:59.997857", "step": 911, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:00.026413", "step": 911, "epoch": 1 }, { "type": "loss", "content": 0.01219746470451355, "timestamp": "2025-09-10 02:27:00.049618", "step": 912, "epoch": 1 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 96 ], "batch_size": 8, "flops": 1898404492032 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 5, 80 ], "batch_size": 8, "flops": 1582003754624 } ], "timestamp": "2025-09-10 02:27:01.874939", "step": 912, "epoch": 1 }, { "type": "pplx", "content": 2265473.919981855, "timestamp": "2025-09-10 02:27:01.876535", "step": 912, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:01.904830", "step": 912, "epoch": 1 }, { "type": "loss", "content": 0.03966560214757919, "timestamp": "2025-09-10 02:27:01.906208", "step": 913, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:01.934727", "step": 913, "epoch": 1 }, { "type": "loss", "content": 0.03247629851102829, "timestamp": "2025-09-10 02:27:01.936280", "step": 914, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:01.964790", "step": 914, "epoch": 1 }, { "type": "loss", "content": 0.025539681315422058, "timestamp": "2025-09-10 02:27:01.966146", "step": 915, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:01.994786", "step": 915, "epoch": 1 }, { "type": "loss", "content": 0.03211888670921326, "timestamp": "2025-09-10 02:27:02.017800", "step": 916, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:02.046792", "step": 916, "epoch": 1 }, { "type": "loss", "content": 0.012743504717946053, "timestamp": "2025-09-10 02:27:02.048254", "step": 917, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:02.076895", "step": 917, "epoch": 1 }, { "type": "loss", "content": 0.03760813549160957, "timestamp": "2025-09-10 02:27:02.078501", "step": 918, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:02.108084", "step": 918, "epoch": 1 }, { "type": "loss", "content": 0.015784770250320435, "timestamp": "2025-09-10 02:27:02.109666", "step": 919, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:02.138145", "step": 919, "epoch": 1 }, { "type": "loss", "content": 0.044560663402080536, "timestamp": "2025-09-10 02:27:02.161404", "step": 920, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:02.190162", "step": 920, "epoch": 1 }, { "type": "loss", "content": 0.04900949448347092, "timestamp": "2025-09-10 02:27:02.191794", "step": 921, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:02.219935", "step": 921, "epoch": 1 }, { "type": "loss", "content": 0.018750814720988274, "timestamp": "2025-09-10 02:27:02.221506", "step": 922, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:02.249736", "step": 922, "epoch": 1 }, { "type": "loss", "content": 0.04206148535013199, "timestamp": "2025-09-10 02:27:02.251491", "step": 923, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:02.280172", "step": 923, "epoch": 1 }, { "type": "loss", "content": 0.021803708747029305, "timestamp": "2025-09-10 02:27:02.303488", "step": 924, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:02.332263", "step": 924, "epoch": 1 }, { "type": "loss", "content": 0.033080197870731354, "timestamp": "2025-09-10 02:27:02.334070", "step": 925, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:02.362784", "step": 925, "epoch": 1 }, { "type": "loss", "content": 0.014295650646090508, "timestamp": "2025-09-10 02:27:02.364554", "step": 926, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:02.392983", "step": 926, "epoch": 1 }, { "type": "loss", "content": 0.042352236807346344, "timestamp": "2025-09-10 02:27:02.394776", "step": 927, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:02.423097", "step": 927, "epoch": 1 }, { "type": "loss", "content": 0.04907870292663574, "timestamp": "2025-09-10 02:27:02.446331", "step": 928, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:02.474628", "step": 928, "epoch": 1 }, { "type": "loss", "content": 0.038371481001377106, "timestamp": "2025-09-10 02:27:02.477096", "step": 929, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:02.508432", "step": 929, "epoch": 1 }, { "type": "loss", "content": 0.049601972103118896, "timestamp": "2025-09-10 02:27:02.510210", "step": 930, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:02.538846", "step": 930, "epoch": 1 }, { "type": "loss", "content": 0.012743870727717876, "timestamp": "2025-09-10 02:27:02.540460", "step": 931, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:02.568749", "step": 931, "epoch": 1 }, { "type": "loss", "content": 0.06787528842687607, "timestamp": "2025-09-10 02:27:02.591908", "step": 932, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:02.620686", "step": 932, "epoch": 1 }, { "type": "loss", "content": 0.02233758009970188, "timestamp": "2025-09-10 02:27:02.622124", "step": 933, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:02.650189", "step": 933, "epoch": 1 }, { "type": "loss", "content": 0.023858215659856796, "timestamp": "2025-09-10 02:27:02.651791", "step": 934, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:02.680110", "step": 934, "epoch": 1 }, { "type": "loss", "content": 0.03377927467226982, "timestamp": "2025-09-10 02:27:02.681605", "step": 935, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:02.710383", "step": 935, "epoch": 1 }, { "type": "loss", "content": 0.05412615090608597, "timestamp": "2025-09-10 02:27:02.733453", "step": 936, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:02.761793", "step": 936, "epoch": 1 }, { "type": "loss", "content": 0.032133154571056366, "timestamp": "2025-09-10 02:27:02.763141", "step": 937, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:02.791620", "step": 937, "epoch": 1 }, { "type": "loss", "content": 0.04311128705739975, "timestamp": "2025-09-10 02:27:02.792882", "step": 938, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:02.820751", "step": 938, "epoch": 1 }, { "type": "loss", "content": 0.03060062788426876, "timestamp": "2025-09-10 02:27:02.822271", "step": 939, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:02.850794", "step": 939, "epoch": 1 }, { "type": "loss", "content": 0.031108910217881203, "timestamp": "2025-09-10 02:27:02.873909", "step": 940, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:02.902147", "step": 940, "epoch": 1 }, { "type": "loss", "content": 0.02943595126271248, "timestamp": "2025-09-10 02:27:02.903480", "step": 941, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:02.931606", "step": 941, "epoch": 1 }, { "type": "loss", "content": 0.022589508444070816, "timestamp": "2025-09-10 02:27:02.933020", "step": 942, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:02.960839", "step": 942, "epoch": 1 }, { "type": "loss", "content": 0.04562641307711601, "timestamp": "2025-09-10 02:27:02.962313", "step": 943, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:02.990669", "step": 943, "epoch": 1 }, { "type": "loss", "content": 0.016814980655908585, "timestamp": "2025-09-10 02:27:03.013643", "step": 944, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:03.041835", "step": 944, "epoch": 1 }, { "type": "loss", "content": 0.017725344747304916, "timestamp": "2025-09-10 02:27:03.043327", "step": 945, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:03.071660", "step": 945, "epoch": 1 }, { "type": "loss", "content": 0.014994575642049313, "timestamp": "2025-09-10 02:27:03.073009", "step": 946, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:03.101297", "step": 946, "epoch": 1 }, { "type": "loss", "content": 0.03003416769206524, "timestamp": "2025-09-10 02:27:03.102795", "step": 947, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:03.131022", "step": 947, "epoch": 1 }, { "type": "loss", "content": 0.055385202169418335, "timestamp": "2025-09-10 02:27:03.154195", "step": 948, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:03.182780", "step": 948, "epoch": 1 }, { "type": "loss", "content": 0.025065213441848755, "timestamp": "2025-09-10 02:27:03.184270", "step": 949, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:03.213047", "step": 949, "epoch": 1 }, { "type": "loss", "content": 0.025820758193731308, "timestamp": "2025-09-10 02:27:03.214556", "step": 950, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:03.243108", "step": 950, "epoch": 1 }, { "type": "loss", "content": 0.05613445118069649, "timestamp": "2025-09-10 02:27:03.244487", "step": 951, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:03.273004", "step": 951, "epoch": 1 }, { "type": "loss", "content": 0.0411837138235569, "timestamp": "2025-09-10 02:27:03.295895", "step": 952, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:03.324441", "step": 952, "epoch": 1 }, { "type": "loss", "content": 0.011896687559783459, "timestamp": "2025-09-10 02:27:03.326055", "step": 953, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:03.354239", "step": 953, "epoch": 1 }, { "type": "loss", "content": 0.0059830015525221825, "timestamp": "2025-09-10 02:27:03.355837", "step": 954, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:03.384199", "step": 954, "epoch": 1 }, { "type": "loss", "content": 0.03133850172162056, "timestamp": "2025-09-10 02:27:03.385963", "step": 955, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:03.413976", "step": 955, "epoch": 1 }, { "type": "loss", "content": 0.02864186279475689, "timestamp": "2025-09-10 02:27:03.436719", "step": 956, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:03.465165", "step": 956, "epoch": 1 }, { "type": "loss", "content": 0.0447709895670414, "timestamp": "2025-09-10 02:27:03.466830", "step": 957, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:03.494904", "step": 957, "epoch": 1 }, { "type": "loss", "content": 0.031331706792116165, "timestamp": "2025-09-10 02:27:03.496441", "step": 958, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:03.524983", "step": 958, "epoch": 1 }, { "type": "loss", "content": 0.023064518347382545, "timestamp": "2025-09-10 02:27:03.527064", "step": 959, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:03.555224", "step": 959, "epoch": 1 }, { "type": "loss", "content": 0.016874434426426888, "timestamp": "2025-09-10 02:27:03.578102", "step": 960, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:27:03.607090", "step": 960, "epoch": 1 }, { "type": "loss", "content": 0.01792900077998638, "timestamp": "2025-09-10 02:27:03.608488", "step": 961, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:03.637568", "step": 961, "epoch": 1 }, { "type": "loss", "content": 0.042464207857847214, "timestamp": "2025-09-10 02:27:03.639224", "step": 962, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:03.668193", "step": 962, "epoch": 1 }, { "type": "loss", "content": 0.018629251047968864, "timestamp": "2025-09-10 02:27:03.669950", "step": 963, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:03.699151", "step": 963, "epoch": 1 }, { "type": "loss", "content": 0.03737715631723404, "timestamp": "2025-09-10 02:27:03.722447", "step": 964, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:03.752221", "step": 964, "epoch": 1 }, { "type": "loss", "content": 0.06381294131278992, "timestamp": "2025-09-10 02:27:03.753951", "step": 965, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:03.782466", "step": 965, "epoch": 1 }, { "type": "loss", "content": 0.029692605137825012, "timestamp": "2025-09-10 02:27:03.784347", "step": 966, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:03.813007", "step": 966, "epoch": 1 }, { "type": "loss", "content": 0.04313949868083, "timestamp": "2025-09-10 02:27:03.814711", "step": 967, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:03.843096", "step": 967, "epoch": 1 }, { "type": "loss", "content": 0.016904447227716446, "timestamp": "2025-09-10 02:27:03.866313", "step": 968, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:03.895093", "step": 968, "epoch": 1 }, { "type": "loss", "content": 0.0268878061324358, "timestamp": "2025-09-10 02:27:03.896895", "step": 969, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:03.925440", "step": 969, "epoch": 1 }, { "type": "loss", "content": 0.012326344847679138, "timestamp": "2025-09-10 02:27:03.927384", "step": 970, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:03.955801", "step": 970, "epoch": 1 }, { "type": "loss", "content": 0.025940924882888794, "timestamp": "2025-09-10 02:27:03.957524", "step": 971, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:03.986154", "step": 971, "epoch": 1 }, { "type": "loss", "content": 0.015164701268076897, "timestamp": "2025-09-10 02:27:04.009311", "step": 972, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:04.037614", "step": 972, "epoch": 1 }, { "type": "loss", "content": 0.10002827644348145, "timestamp": "2025-09-10 02:27:04.039339", "step": 973, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:04.068016", "step": 973, "epoch": 1 }, { "type": "loss", "content": 0.040858518332242966, "timestamp": "2025-09-10 02:27:04.069827", "step": 974, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:04.098206", "step": 974, "epoch": 1 }, { "type": "loss", "content": 0.016241848468780518, "timestamp": "2025-09-10 02:27:04.099834", "step": 975, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:04.128428", "step": 975, "epoch": 1 }, { "type": "loss", "content": 0.07131864875555038, "timestamp": "2025-09-10 02:27:04.151640", "step": 976, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:04.180893", "step": 976, "epoch": 1 }, { "type": "loss", "content": 0.03664417937397957, "timestamp": "2025-09-10 02:27:04.182549", "step": 977, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:04.210595", "step": 977, "epoch": 1 }, { "type": "loss", "content": 0.050497375428676605, "timestamp": "2025-09-10 02:27:04.212159", "step": 978, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:04.240365", "step": 978, "epoch": 1 }, { "type": "loss", "content": 0.017245125025510788, "timestamp": "2025-09-10 02:27:04.242231", "step": 979, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:04.270994", "step": 979, "epoch": 1 }, { "type": "loss", "content": 0.04215339943766594, "timestamp": "2025-09-10 02:27:04.294105", "step": 980, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:04.322785", "step": 980, "epoch": 1 }, { "type": "loss", "content": 0.003192092990502715, "timestamp": "2025-09-10 02:27:04.324488", "step": 981, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:04.353422", "step": 981, "epoch": 1 }, { "type": "loss", "content": 0.02548823133111, "timestamp": "2025-09-10 02:27:04.355231", "step": 982, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:04.384531", "step": 982, "epoch": 1 }, { "type": "loss", "content": 0.017364230006933212, "timestamp": "2025-09-10 02:27:04.386432", "step": 983, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:04.415285", "step": 983, "epoch": 1 }, { "type": "loss", "content": 0.024783683940768242, "timestamp": "2025-09-10 02:27:04.438638", "step": 984, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:04.467099", "step": 984, "epoch": 1 }, { "type": "loss", "content": 0.03101974166929722, "timestamp": "2025-09-10 02:27:04.468706", "step": 985, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:04.497053", "step": 985, "epoch": 1 }, { "type": "loss", "content": 0.043494176119565964, "timestamp": "2025-09-10 02:27:04.498665", "step": 986, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:04.526859", "step": 986, "epoch": 1 }, { "type": "loss", "content": 0.010910848155617714, "timestamp": "2025-09-10 02:27:04.528682", "step": 987, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:04.558528", "step": 987, "epoch": 1 }, { "type": "loss", "content": 0.025911325588822365, "timestamp": "2025-09-10 02:27:04.581796", "step": 988, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:04.610727", "step": 988, "epoch": 1 }, { "type": "loss", "content": 0.0578092560172081, "timestamp": "2025-09-10 02:27:04.612286", "step": 989, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:04.641439", "step": 989, "epoch": 1 }, { "type": "loss", "content": 0.04395212605595589, "timestamp": "2025-09-10 02:27:04.643155", "step": 990, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:04.672103", "step": 990, "epoch": 1 }, { "type": "loss", "content": 0.015900081023573875, "timestamp": "2025-09-10 02:27:04.673889", "step": 991, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:04.702703", "step": 991, "epoch": 1 }, { "type": "loss", "content": 0.015407932922244072, "timestamp": "2025-09-10 02:27:04.725898", "step": 992, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:04.754367", "step": 992, "epoch": 1 }, { "type": "loss", "content": 0.07063453644514084, "timestamp": "2025-09-10 02:27:04.755996", "step": 993, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:04.784303", "step": 993, "epoch": 1 }, { "type": "loss", "content": 0.031969036906957626, "timestamp": "2025-09-10 02:27:04.785958", "step": 994, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:04.814521", "step": 994, "epoch": 1 }, { "type": "loss", "content": 0.028009014204144478, "timestamp": "2025-09-10 02:27:04.816387", "step": 995, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:04.845057", "step": 995, "epoch": 1 }, { "type": "loss", "content": 0.0059646558947861195, "timestamp": "2025-09-10 02:27:04.868182", "step": 996, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:04.897044", "step": 996, "epoch": 1 }, { "type": "loss", "content": 0.06651411205530167, "timestamp": "2025-09-10 02:27:04.898656", "step": 997, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:04.926954", "step": 997, "epoch": 1 }, { "type": "loss", "content": 0.006047925911843777, "timestamp": "2025-09-10 02:27:04.928879", "step": 998, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:04.957467", "step": 998, "epoch": 1 }, { "type": "loss", "content": 0.04073864221572876, "timestamp": "2025-09-10 02:27:04.959268", "step": 999, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:04.988198", "step": 999, "epoch": 1 }, { "type": "loss", "content": 0.02032596431672573, "timestamp": "2025-09-10 02:27:05.011403", "step": 1000, "epoch": 1 }, { "type": "info", "content": "Checkpoint saved at step 1000", "timestamp": "2025-09-10 02:27:09.409925", "step": 1000, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:09.446363", "step": 1000, "epoch": 1 }, { "type": "loss", "content": 0.03654515743255615, "timestamp": "2025-09-10 02:27:09.448213", "step": 1001, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:09.477474", "step": 1001, "epoch": 1 }, { "type": "loss", "content": 0.047440074384212494, "timestamp": "2025-09-10 02:27:09.479215", "step": 1002, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:09.507798", "step": 1002, "epoch": 1 }, { "type": "loss", "content": 0.04047483578324318, "timestamp": "2025-09-10 02:27:09.509390", "step": 1003, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:09.538484", "step": 1003, "epoch": 1 }, { "type": "loss", "content": 0.03700224682688713, "timestamp": "2025-09-10 02:27:09.561972", "step": 1004, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:09.590498", "step": 1004, "epoch": 1 }, { "type": "loss", "content": 0.04263563081622124, "timestamp": "2025-09-10 02:27:09.592363", "step": 1005, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:09.620779", "step": 1005, "epoch": 1 }, { "type": "loss", "content": 0.035001423209905624, "timestamp": "2025-09-10 02:27:09.622665", "step": 1006, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:09.651589", "step": 1006, "epoch": 1 }, { "type": "loss", "content": 0.013720368035137653, "timestamp": "2025-09-10 02:27:09.653375", "step": 1007, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:09.682237", "step": 1007, "epoch": 1 }, { "type": "loss", "content": 0.024205489084124565, "timestamp": "2025-09-10 02:27:09.705612", "step": 1008, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:09.733682", "step": 1008, "epoch": 1 }, { "type": "loss", "content": 0.043926943093538284, "timestamp": "2025-09-10 02:27:09.735479", "step": 1009, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:09.764005", "step": 1009, "epoch": 1 }, { "type": "loss", "content": 0.04182643070816994, "timestamp": "2025-09-10 02:27:09.765809", "step": 1010, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:09.793970", "step": 1010, "epoch": 1 }, { "type": "loss", "content": 0.034336067736148834, "timestamp": "2025-09-10 02:27:09.795569", "step": 1011, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:09.824126", "step": 1011, "epoch": 1 }, { "type": "loss", "content": 0.026709143072366714, "timestamp": "2025-09-10 02:27:09.847448", "step": 1012, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:09.875869", "step": 1012, "epoch": 1 }, { "type": "loss", "content": 0.04808344691991806, "timestamp": "2025-09-10 02:27:09.878106", "step": 1013, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:09.907276", "step": 1013, "epoch": 1 }, { "type": "loss", "content": 0.028084395453333855, "timestamp": "2025-09-10 02:27:09.908860", "step": 1014, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:09.937127", "step": 1014, "epoch": 1 }, { "type": "loss", "content": 0.028582213446497917, "timestamp": "2025-09-10 02:27:09.938786", "step": 1015, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:09.967034", "step": 1015, "epoch": 1 }, { "type": "loss", "content": 0.04544654116034508, "timestamp": "2025-09-10 02:27:09.990038", "step": 1016, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:10.018453", "step": 1016, "epoch": 1 }, { "type": "loss", "content": 0.07535956799983978, "timestamp": "2025-09-10 02:27:10.020201", "step": 1017, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:10.048706", "step": 1017, "epoch": 1 }, { "type": "loss", "content": 0.023995866999030113, "timestamp": "2025-09-10 02:27:10.050522", "step": 1018, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:10.078807", "step": 1018, "epoch": 1 }, { "type": "loss", "content": 0.017657050862908363, "timestamp": "2025-09-10 02:27:10.080466", "step": 1019, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:10.108715", "step": 1019, "epoch": 1 }, { "type": "loss", "content": 0.028995100408792496, "timestamp": "2025-09-10 02:27:10.131780", "step": 1020, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:10.160448", "step": 1020, "epoch": 1 }, { "type": "loss", "content": 0.020763801410794258, "timestamp": "2025-09-10 02:27:10.162146", "step": 1021, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:10.190624", "step": 1021, "epoch": 1 }, { "type": "loss", "content": 0.0394793376326561, "timestamp": "2025-09-10 02:27:10.192285", "step": 1022, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:10.221645", "step": 1022, "epoch": 1 }, { "type": "loss", "content": 0.0334097184240818, "timestamp": "2025-09-10 02:27:10.223224", "step": 1023, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:10.252436", "step": 1023, "epoch": 1 }, { "type": "loss", "content": 0.021310362964868546, "timestamp": "2025-09-10 02:27:10.276281", "step": 1024, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:10.305614", "step": 1024, "epoch": 1 }, { "type": "loss", "content": 0.028816591948270798, "timestamp": "2025-09-10 02:27:10.307429", "step": 1025, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:10.336526", "step": 1025, "epoch": 1 }, { "type": "loss", "content": 0.03184013068675995, "timestamp": "2025-09-10 02:27:10.338348", "step": 1026, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:10.367897", "step": 1026, "epoch": 1 }, { "type": "loss", "content": 0.024549782276153564, "timestamp": "2025-09-10 02:27:10.369674", "step": 1027, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:10.398144", "step": 1027, "epoch": 1 }, { "type": "loss", "content": 0.020799925550818443, "timestamp": "2025-09-10 02:27:10.421521", "step": 1028, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:10.450482", "step": 1028, "epoch": 1 }, { "type": "loss", "content": 0.02477448806166649, "timestamp": "2025-09-10 02:27:10.452148", "step": 1029, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:10.480617", "step": 1029, "epoch": 1 }, { "type": "loss", "content": 0.0107790632173419, "timestamp": "2025-09-10 02:27:10.482491", "step": 1030, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:10.511187", "step": 1030, "epoch": 1 }, { "type": "loss", "content": 0.02755180187523365, "timestamp": "2025-09-10 02:27:10.512835", "step": 1031, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:10.541499", "step": 1031, "epoch": 1 }, { "type": "loss", "content": 0.011883458122611046, "timestamp": "2025-09-10 02:27:10.564836", "step": 1032, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:10.593478", "step": 1032, "epoch": 1 }, { "type": "loss", "content": 0.050424523651599884, "timestamp": "2025-09-10 02:27:10.595216", "step": 1033, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:10.624006", "step": 1033, "epoch": 1 }, { "type": "loss", "content": 0.03125917539000511, "timestamp": "2025-09-10 02:27:10.625942", "step": 1034, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:10.654356", "step": 1034, "epoch": 1 }, { "type": "loss", "content": 0.028955284506082535, "timestamp": "2025-09-10 02:27:10.656204", "step": 1035, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:10.684559", "step": 1035, "epoch": 1 }, { "type": "loss", "content": 0.010720565915107727, "timestamp": "2025-09-10 02:27:10.707786", "step": 1036, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:10.736276", "step": 1036, "epoch": 1 }, { "type": "loss", "content": 0.05103132128715515, "timestamp": "2025-09-10 02:27:10.738069", "step": 1037, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:10.766381", "step": 1037, "epoch": 1 }, { "type": "loss", "content": 0.03147753328084946, "timestamp": "2025-09-10 02:27:10.768215", "step": 1038, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:10.796896", "step": 1038, "epoch": 1 }, { "type": "loss", "content": 0.03331483528017998, "timestamp": "2025-09-10 02:27:10.798500", "step": 1039, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:10.827959", "step": 1039, "epoch": 1 }, { "type": "loss", "content": 0.03740553930401802, "timestamp": "2025-09-10 02:27:10.850967", "step": 1040, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:10.879672", "step": 1040, "epoch": 1 }, { "type": "loss", "content": 0.0681772381067276, "timestamp": "2025-09-10 02:27:10.881578", "step": 1041, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:10.910105", "step": 1041, "epoch": 1 }, { "type": "loss", "content": 0.048164743930101395, "timestamp": "2025-09-10 02:27:10.911982", "step": 1042, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:10.940791", "step": 1042, "epoch": 1 }, { "type": "loss", "content": 0.0528777651488781, "timestamp": "2025-09-10 02:27:10.945610", "step": 1043, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:10.974158", "step": 1043, "epoch": 1 }, { "type": "loss", "content": 0.01392888743430376, "timestamp": "2025-09-10 02:27:10.997352", "step": 1044, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:11.026347", "step": 1044, "epoch": 1 }, { "type": "loss", "content": 0.02998768724501133, "timestamp": "2025-09-10 02:27:11.028158", "step": 1045, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:11.057119", "step": 1045, "epoch": 1 }, { "type": "loss", "content": 0.01582830585539341, "timestamp": "2025-09-10 02:27:11.058773", "step": 1046, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:11.086986", "step": 1046, "epoch": 1 }, { "type": "loss", "content": 0.01603604108095169, "timestamp": "2025-09-10 02:27:11.088891", "step": 1047, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:11.117302", "step": 1047, "epoch": 1 }, { "type": "loss", "content": 0.049586307257413864, "timestamp": "2025-09-10 02:27:11.140361", "step": 1048, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:11.168794", "step": 1048, "epoch": 1 }, { "type": "loss", "content": 0.03988540172576904, "timestamp": "2025-09-10 02:27:11.170612", "step": 1049, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:11.199250", "step": 1049, "epoch": 1 }, { "type": "loss", "content": 0.016875585541129112, "timestamp": "2025-09-10 02:27:11.201082", "step": 1050, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:11.229675", "step": 1050, "epoch": 1 }, { "type": "loss", "content": 0.041746437549591064, "timestamp": "2025-09-10 02:27:11.231377", "step": 1051, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:11.260020", "step": 1051, "epoch": 1 }, { "type": "loss", "content": 0.04784726724028587, "timestamp": "2025-09-10 02:27:11.284225", "step": 1052, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:11.312594", "step": 1052, "epoch": 1 }, { "type": "loss", "content": 0.016732031479477882, "timestamp": "2025-09-10 02:27:11.314192", "step": 1053, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:11.343012", "step": 1053, "epoch": 1 }, { "type": "loss", "content": 0.020166723057627678, "timestamp": "2025-09-10 02:27:11.344822", "step": 1054, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:11.373705", "step": 1054, "epoch": 1 }, { "type": "loss", "content": 0.08511082828044891, "timestamp": "2025-09-10 02:27:11.375430", "step": 1055, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:11.405111", "step": 1055, "epoch": 1 }, { "type": "loss", "content": 0.017505383118987083, "timestamp": "2025-09-10 02:27:11.428428", "step": 1056, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:11.457317", "step": 1056, "epoch": 1 }, { "type": "loss", "content": 0.03421511873602867, "timestamp": "2025-09-10 02:27:11.459201", "step": 1057, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:11.487782", "step": 1057, "epoch": 1 }, { "type": "loss", "content": 0.05192688852548599, "timestamp": "2025-09-10 02:27:11.489481", "step": 1058, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:11.518000", "step": 1058, "epoch": 1 }, { "type": "loss", "content": 0.0304196048527956, "timestamp": "2025-09-10 02:27:11.519805", "step": 1059, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:11.548572", "step": 1059, "epoch": 1 }, { "type": "loss", "content": 0.04589404910802841, "timestamp": "2025-09-10 02:27:11.571889", "step": 1060, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:11.600483", "step": 1060, "epoch": 1 }, { "type": "loss", "content": 0.03423125296831131, "timestamp": "2025-09-10 02:27:11.602102", "step": 1061, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:11.630912", "step": 1061, "epoch": 1 }, { "type": "loss", "content": 0.0522378571331501, "timestamp": "2025-09-10 02:27:11.632565", "step": 1062, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:11.660789", "step": 1062, "epoch": 1 }, { "type": "loss", "content": 0.061813417822122574, "timestamp": "2025-09-10 02:27:11.662410", "step": 1063, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:11.690942", "step": 1063, "epoch": 1 }, { "type": "loss", "content": 0.026628315448760986, "timestamp": "2025-09-10 02:27:11.714067", "step": 1064, "epoch": 1 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 96 ], "batch_size": 8, "flops": 1898404492032 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 5, 80 ], "batch_size": 8, "flops": 1582003754624 } ], "timestamp": "2025-09-10 02:27:13.561358", "step": 1064, "epoch": 1 }, { "type": "pplx", "content": 2371722.7542119753, "timestamp": "2025-09-10 02:27:13.563301", "step": 1064, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:13.590792", "step": 1064, "epoch": 1 }, { "type": "loss", "content": 0.03160058334469795, "timestamp": "2025-09-10 02:27:13.592574", "step": 1065, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:13.621158", "step": 1065, "epoch": 1 }, { "type": "loss", "content": 0.04315807297825813, "timestamp": "2025-09-10 02:27:13.623062", "step": 1066, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:13.651717", "step": 1066, "epoch": 1 }, { "type": "loss", "content": 0.01032306905835867, "timestamp": "2025-09-10 02:27:13.653395", "step": 1067, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:13.682021", "step": 1067, "epoch": 1 }, { "type": "loss", "content": 0.029713327065110207, "timestamp": "2025-09-10 02:27:13.705471", "step": 1068, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:13.734395", "step": 1068, "epoch": 1 }, { "type": "loss", "content": 0.001077897846698761, "timestamp": "2025-09-10 02:27:13.736210", "step": 1069, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:13.765096", "step": 1069, "epoch": 1 }, { "type": "loss", "content": 0.009043855592608452, "timestamp": "2025-09-10 02:27:13.766977", "step": 1070, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:13.796150", "step": 1070, "epoch": 1 }, { "type": "loss", "content": 0.025214748457074165, "timestamp": "2025-09-10 02:27:13.797693", "step": 1071, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:13.826536", "step": 1071, "epoch": 1 }, { "type": "loss", "content": 0.013453769497573376, "timestamp": "2025-09-10 02:27:13.849910", "step": 1072, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:13.879541", "step": 1072, "epoch": 1 }, { "type": "loss", "content": 0.03888639435172081, "timestamp": "2025-09-10 02:27:13.881380", "step": 1073, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:13.910223", "step": 1073, "epoch": 1 }, { "type": "loss", "content": 0.019313322380185127, "timestamp": "2025-09-10 02:27:13.912568", "step": 1074, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:13.941397", "step": 1074, "epoch": 1 }, { "type": "loss", "content": 0.0487765371799469, "timestamp": "2025-09-10 02:27:13.943313", "step": 1075, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:13.972421", "step": 1075, "epoch": 1 }, { "type": "loss", "content": 0.10099424421787262, "timestamp": "2025-09-10 02:27:13.995743", "step": 1076, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:14.024605", "step": 1076, "epoch": 1 }, { "type": "loss", "content": 0.018047673627734184, "timestamp": "2025-09-10 02:27:14.026385", "step": 1077, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:14.054914", "step": 1077, "epoch": 1 }, { "type": "loss", "content": 0.02523096464574337, "timestamp": "2025-09-10 02:27:14.056844", "step": 1078, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:14.086109", "step": 1078, "epoch": 1 }, { "type": "loss", "content": 0.021651145070791245, "timestamp": "2025-09-10 02:27:14.087975", "step": 1079, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:14.116540", "step": 1079, "epoch": 1 }, { "type": "loss", "content": 0.03117157705128193, "timestamp": "2025-09-10 02:27:14.139838", "step": 1080, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:14.168137", "step": 1080, "epoch": 1 }, { "type": "loss", "content": 0.012788075022399426, "timestamp": "2025-09-10 02:27:14.169866", "step": 1081, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:14.198437", "step": 1081, "epoch": 1 }, { "type": "loss", "content": 0.03557702153921127, "timestamp": "2025-09-10 02:27:14.200196", "step": 1082, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:14.228923", "step": 1082, "epoch": 1 }, { "type": "loss", "content": 0.015616429038345814, "timestamp": "2025-09-10 02:27:14.230662", "step": 1083, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:14.258895", "step": 1083, "epoch": 1 }, { "type": "loss", "content": 0.017235806211829185, "timestamp": "2025-09-10 02:27:14.282047", "step": 1084, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:14.310957", "step": 1084, "epoch": 1 }, { "type": "loss", "content": 0.023321127519011497, "timestamp": "2025-09-10 02:27:14.313119", "step": 1085, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:14.341204", "step": 1085, "epoch": 1 }, { "type": "loss", "content": 0.019345298409461975, "timestamp": "2025-09-10 02:27:14.343044", "step": 1086, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:14.371907", "step": 1086, "epoch": 1 }, { "type": "loss", "content": 0.014294320717453957, "timestamp": "2025-09-10 02:27:14.373753", "step": 1087, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:14.402377", "step": 1087, "epoch": 1 }, { "type": "loss", "content": 0.017413537949323654, "timestamp": "2025-09-10 02:27:14.425751", "step": 1088, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:14.454139", "step": 1088, "epoch": 1 }, { "type": "loss", "content": 0.010392394848167896, "timestamp": "2025-09-10 02:27:14.455815", "step": 1089, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:14.484133", "step": 1089, "epoch": 1 }, { "type": "loss", "content": 0.03324350342154503, "timestamp": "2025-09-10 02:27:14.485777", "step": 1090, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:14.514360", "step": 1090, "epoch": 1 }, { "type": "loss", "content": 0.0070959129370749, "timestamp": "2025-09-10 02:27:14.516005", "step": 1091, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:14.545060", "step": 1091, "epoch": 1 }, { "type": "loss", "content": 0.04243098571896553, "timestamp": "2025-09-10 02:27:14.568407", "step": 1092, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:14.597655", "step": 1092, "epoch": 1 }, { "type": "loss", "content": 0.025557631626725197, "timestamp": "2025-09-10 02:27:14.599503", "step": 1093, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:14.628391", "step": 1093, "epoch": 1 }, { "type": "loss", "content": 0.04200953245162964, "timestamp": "2025-09-10 02:27:14.630385", "step": 1094, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:14.659004", "step": 1094, "epoch": 1 }, { "type": "loss", "content": 0.05213457718491554, "timestamp": "2025-09-10 02:27:14.660982", "step": 1095, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:14.689571", "step": 1095, "epoch": 1 }, { "type": "loss", "content": 0.023358408361673355, "timestamp": "2025-09-10 02:27:14.712920", "step": 1096, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:14.741454", "step": 1096, "epoch": 1 }, { "type": "loss", "content": 0.013042804785072803, "timestamp": "2025-09-10 02:27:14.743258", "step": 1097, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:14.772438", "step": 1097, "epoch": 1 }, { "type": "loss", "content": 0.035700105130672455, "timestamp": "2025-09-10 02:27:14.774267", "step": 1098, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:14.803387", "step": 1098, "epoch": 1 }, { "type": "loss", "content": 0.08563186228275299, "timestamp": "2025-09-10 02:27:14.805300", "step": 1099, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:14.834341", "step": 1099, "epoch": 1 }, { "type": "loss", "content": 0.036948930472135544, "timestamp": "2025-09-10 02:27:14.857451", "step": 1100, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:14.886200", "step": 1100, "epoch": 1 }, { "type": "loss", "content": 0.05217735096812248, "timestamp": "2025-09-10 02:27:14.888010", "step": 1101, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:14.916899", "step": 1101, "epoch": 1 }, { "type": "loss", "content": 0.0390712209045887, "timestamp": "2025-09-10 02:27:14.918478", "step": 1102, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:14.947274", "step": 1102, "epoch": 1 }, { "type": "loss", "content": 0.058155059814453125, "timestamp": "2025-09-10 02:27:14.948974", "step": 1103, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:14.977620", "step": 1103, "epoch": 1 }, { "type": "loss", "content": 0.0360332615673542, "timestamp": "2025-09-10 02:27:15.000831", "step": 1104, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:15.029488", "step": 1104, "epoch": 1 }, { "type": "loss", "content": 0.024571111425757408, "timestamp": "2025-09-10 02:27:15.031350", "step": 1105, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:15.060326", "step": 1105, "epoch": 1 }, { "type": "loss", "content": 0.055816736072301865, "timestamp": "2025-09-10 02:27:15.061956", "step": 1106, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:15.090646", "step": 1106, "epoch": 1 }, { "type": "loss", "content": 0.04789220914244652, "timestamp": "2025-09-10 02:27:15.092463", "step": 1107, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:15.121016", "step": 1107, "epoch": 1 }, { "type": "loss", "content": 0.0074346489273011684, "timestamp": "2025-09-10 02:27:15.149730", "step": 1108, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:15.178898", "step": 1108, "epoch": 1 }, { "type": "loss", "content": 0.015395489521324635, "timestamp": "2025-09-10 02:27:15.180827", "step": 1109, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:15.209715", "step": 1109, "epoch": 1 }, { "type": "loss", "content": 0.018152425065636635, "timestamp": "2025-09-10 02:27:15.211581", "step": 1110, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:15.240425", "step": 1110, "epoch": 1 }, { "type": "loss", "content": 0.045093268156051636, "timestamp": "2025-09-10 02:27:15.242189", "step": 1111, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:15.271313", "step": 1111, "epoch": 1 }, { "type": "loss", "content": 0.029312655329704285, "timestamp": "2025-09-10 02:27:15.294467", "step": 1112, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:15.322636", "step": 1112, "epoch": 1 }, { "type": "loss", "content": 0.016746576875448227, "timestamp": "2025-09-10 02:27:15.324478", "step": 1113, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:15.353145", "step": 1113, "epoch": 1 }, { "type": "loss", "content": 0.038051072508096695, "timestamp": "2025-09-10 02:27:15.356001", "step": 1114, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:15.384523", "step": 1114, "epoch": 1 }, { "type": "loss", "content": 0.02798115275800228, "timestamp": "2025-09-10 02:27:15.387102", "step": 1115, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:15.415815", "step": 1115, "epoch": 1 }, { "type": "loss", "content": 0.01591372862458229, "timestamp": "2025-09-10 02:27:15.438921", "step": 1116, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:15.467468", "step": 1116, "epoch": 1 }, { "type": "loss", "content": 0.021558169275522232, "timestamp": "2025-09-10 02:27:15.469338", "step": 1117, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:15.497956", "step": 1117, "epoch": 1 }, { "type": "loss", "content": 0.01798681728541851, "timestamp": "2025-09-10 02:27:15.499582", "step": 1118, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:15.532023", "step": 1118, "epoch": 1 }, { "type": "loss", "content": 0.03604728356003761, "timestamp": "2025-09-10 02:27:15.533881", "step": 1119, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:15.562489", "step": 1119, "epoch": 1 }, { "type": "loss", "content": 0.052804116159677505, "timestamp": "2025-09-10 02:27:15.585704", "step": 1120, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:15.614485", "step": 1120, "epoch": 1 }, { "type": "loss", "content": 0.06106474995613098, "timestamp": "2025-09-10 02:27:15.616366", "step": 1121, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:15.645155", "step": 1121, "epoch": 1 }, { "type": "loss", "content": 0.050376616418361664, "timestamp": "2025-09-10 02:27:15.646873", "step": 1122, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:15.675519", "step": 1122, "epoch": 1 }, { "type": "loss", "content": 0.028567716479301453, "timestamp": "2025-09-10 02:27:15.677583", "step": 1123, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:15.706290", "step": 1123, "epoch": 1 }, { "type": "loss", "content": 0.017581339925527573, "timestamp": "2025-09-10 02:27:15.729644", "step": 1124, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:15.758894", "step": 1124, "epoch": 1 }, { "type": "loss", "content": 0.06794560700654984, "timestamp": "2025-09-10 02:27:15.761800", "step": 1125, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:15.791419", "step": 1125, "epoch": 1 }, { "type": "loss", "content": 0.023648962378501892, "timestamp": "2025-09-10 02:27:15.793250", "step": 1126, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:15.823279", "step": 1126, "epoch": 1 }, { "type": "loss", "content": 0.014147087931632996, "timestamp": "2025-09-10 02:27:15.824878", "step": 1127, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:15.855706", "step": 1127, "epoch": 1 }, { "type": "loss", "content": 0.02838747762143612, "timestamp": "2025-09-10 02:27:15.879147", "step": 1128, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:15.908398", "step": 1128, "epoch": 1 }, { "type": "loss", "content": 0.024299990385770798, "timestamp": "2025-09-10 02:27:15.909995", "step": 1129, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:15.940091", "step": 1129, "epoch": 1 }, { "type": "loss", "content": 0.040718890726566315, "timestamp": "2025-09-10 02:27:15.941956", "step": 1130, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:15.970668", "step": 1130, "epoch": 1 }, { "type": "loss", "content": 0.026956811547279358, "timestamp": "2025-09-10 02:27:15.972597", "step": 1131, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:16.001502", "step": 1131, "epoch": 1 }, { "type": "loss", "content": 0.020035987719893456, "timestamp": "2025-09-10 02:27:16.031771", "step": 1132, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:16.062656", "step": 1132, "epoch": 1 }, { "type": "loss", "content": 0.020776256918907166, "timestamp": "2025-09-10 02:27:16.064529", "step": 1133, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:16.093561", "step": 1133, "epoch": 1 }, { "type": "loss", "content": 0.03961186483502388, "timestamp": "2025-09-10 02:27:16.095439", "step": 1134, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:16.124253", "step": 1134, "epoch": 1 }, { "type": "loss", "content": 0.03113470785319805, "timestamp": "2025-09-10 02:27:16.126083", "step": 1135, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:16.154715", "step": 1135, "epoch": 1 }, { "type": "loss", "content": 0.05010887607932091, "timestamp": "2025-09-10 02:27:16.177947", "step": 1136, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:16.206794", "step": 1136, "epoch": 1 }, { "type": "loss", "content": 0.028908152133226395, "timestamp": "2025-09-10 02:27:16.209886", "step": 1137, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:16.242380", "step": 1137, "epoch": 1 }, { "type": "loss", "content": 0.043085407465696335, "timestamp": "2025-09-10 02:27:16.244242", "step": 1138, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:16.273489", "step": 1138, "epoch": 1 }, { "type": "loss", "content": 0.017086556181311607, "timestamp": "2025-09-10 02:27:16.275283", "step": 1139, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:16.304018", "step": 1139, "epoch": 1 }, { "type": "loss", "content": 0.04856061562895775, "timestamp": "2025-09-10 02:27:16.327420", "step": 1140, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:16.356166", "step": 1140, "epoch": 1 }, { "type": "loss", "content": 0.03742309287190437, "timestamp": "2025-09-10 02:27:16.358018", "step": 1141, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:16.386671", "step": 1141, "epoch": 1 }, { "type": "loss", "content": 0.05607035011053085, "timestamp": "2025-09-10 02:27:16.388548", "step": 1142, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:16.416932", "step": 1142, "epoch": 1 }, { "type": "loss", "content": 0.019550606608390808, "timestamp": "2025-09-10 02:27:16.419157", "step": 1143, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:16.447648", "step": 1143, "epoch": 1 }, { "type": "loss", "content": 0.02041657827794552, "timestamp": "2025-09-10 02:27:16.471015", "step": 1144, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:16.499498", "step": 1144, "epoch": 1 }, { "type": "loss", "content": 0.008098459802567959, "timestamp": "2025-09-10 02:27:16.501178", "step": 1145, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:16.529696", "step": 1145, "epoch": 1 }, { "type": "loss", "content": 0.015380357392132282, "timestamp": "2025-09-10 02:27:16.531550", "step": 1146, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:16.560308", "step": 1146, "epoch": 1 }, { "type": "loss", "content": 0.02163703925907612, "timestamp": "2025-09-10 02:27:16.561944", "step": 1147, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:16.590482", "step": 1147, "epoch": 1 }, { "type": "loss", "content": 0.035727955400943756, "timestamp": "2025-09-10 02:27:16.613924", "step": 1148, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:16.642988", "step": 1148, "epoch": 1 }, { "type": "loss", "content": 0.009822634980082512, "timestamp": "2025-09-10 02:27:16.644561", "step": 1149, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:16.672969", "step": 1149, "epoch": 1 }, { "type": "loss", "content": 0.005142558831721544, "timestamp": "2025-09-10 02:27:16.674810", "step": 1150, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:16.703610", "step": 1150, "epoch": 1 }, { "type": "loss", "content": 0.03525710478425026, "timestamp": "2025-09-10 02:27:16.705199", "step": 1151, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:16.733783", "step": 1151, "epoch": 1 }, { "type": "loss", "content": 0.0724150687456131, "timestamp": "2025-09-10 02:27:16.757345", "step": 1152, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:16.786786", "step": 1152, "epoch": 1 }, { "type": "loss", "content": 0.06361201405525208, "timestamp": "2025-09-10 02:27:16.788835", "step": 1153, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:16.817404", "step": 1153, "epoch": 1 }, { "type": "loss", "content": 0.06277791410684586, "timestamp": "2025-09-10 02:27:16.819775", "step": 1154, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:16.849183", "step": 1154, "epoch": 1 }, { "type": "loss", "content": 0.06249118596315384, "timestamp": "2025-09-10 02:27:16.851034", "step": 1155, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:16.879697", "step": 1155, "epoch": 1 }, { "type": "loss", "content": 0.023662757128477097, "timestamp": "2025-09-10 02:27:16.903181", "step": 1156, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:16.932319", "step": 1156, "epoch": 1 }, { "type": "loss", "content": 0.028213325887918472, "timestamp": "2025-09-10 02:27:16.934216", "step": 1157, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:16.962962", "step": 1157, "epoch": 1 }, { "type": "loss", "content": 0.04014962911605835, "timestamp": "2025-09-10 02:27:16.964595", "step": 1158, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:16.993083", "step": 1158, "epoch": 1 }, { "type": "loss", "content": 0.019419433549046516, "timestamp": "2025-09-10 02:27:16.994968", "step": 1159, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:17.023560", "step": 1159, "epoch": 1 }, { "type": "loss", "content": 0.012511305510997772, "timestamp": "2025-09-10 02:27:17.046738", "step": 1160, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:17.075291", "step": 1160, "epoch": 1 }, { "type": "loss", "content": 0.04608547315001488, "timestamp": "2025-09-10 02:27:17.076989", "step": 1161, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:17.105956", "step": 1161, "epoch": 1 }, { "type": "loss", "content": 0.0226228516548872, "timestamp": "2025-09-10 02:27:17.107671", "step": 1162, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:17.136318", "step": 1162, "epoch": 1 }, { "type": "loss", "content": 0.010072730481624603, "timestamp": "2025-09-10 02:27:17.138082", "step": 1163, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:17.166945", "step": 1163, "epoch": 1 }, { "type": "loss", "content": 0.011195765808224678, "timestamp": "2025-09-10 02:27:17.190368", "step": 1164, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:17.220006", "step": 1164, "epoch": 1 }, { "type": "loss", "content": 0.015673991292715073, "timestamp": "2025-09-10 02:27:17.222011", "step": 1165, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:17.250591", "step": 1165, "epoch": 1 }, { "type": "loss", "content": 0.033464204519987106, "timestamp": "2025-09-10 02:27:17.252615", "step": 1166, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:17.281724", "step": 1166, "epoch": 1 }, { "type": "loss", "content": 0.022387336939573288, "timestamp": "2025-09-10 02:27:17.283920", "step": 1167, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:17.312436", "step": 1167, "epoch": 1 }, { "type": "loss", "content": 0.0325801745057106, "timestamp": "2025-09-10 02:27:17.335682", "step": 1168, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:17.364525", "step": 1168, "epoch": 1 }, { "type": "loss", "content": 0.06113835796713829, "timestamp": "2025-09-10 02:27:17.366450", "step": 1169, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:17.395501", "step": 1169, "epoch": 1 }, { "type": "loss", "content": 0.011920975521206856, "timestamp": "2025-09-10 02:27:17.397258", "step": 1170, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:17.426245", "step": 1170, "epoch": 1 }, { "type": "loss", "content": 0.021274421364068985, "timestamp": "2025-09-10 02:27:17.428356", "step": 1171, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:17.457932", "step": 1171, "epoch": 1 }, { "type": "loss", "content": 0.04234448820352554, "timestamp": "2025-09-10 02:27:17.481314", "step": 1172, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:17.510844", "step": 1172, "epoch": 1 }, { "type": "loss", "content": 0.027071913704276085, "timestamp": "2025-09-10 02:27:17.512722", "step": 1173, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:17.541342", "step": 1173, "epoch": 1 }, { "type": "loss", "content": 0.013425322249531746, "timestamp": "2025-09-10 02:27:17.542903", "step": 1174, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:17.570976", "step": 1174, "epoch": 1 }, { "type": "loss", "content": 0.007693841587752104, "timestamp": "2025-09-10 02:27:17.573902", "step": 1175, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:17.606463", "step": 1175, "epoch": 1 }, { "type": "loss", "content": 0.027679383754730225, "timestamp": "2025-09-10 02:27:17.629475", "step": 1176, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:17.658064", "step": 1176, "epoch": 1 }, { "type": "loss", "content": 0.05865621566772461, "timestamp": "2025-09-10 02:27:17.661861", "step": 1177, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:17.690721", "step": 1177, "epoch": 1 }, { "type": "loss", "content": 0.015808407217264175, "timestamp": "2025-09-10 02:27:17.695697", "step": 1178, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:27:17.724286", "step": 1178, "epoch": 1 }, { "type": "loss", "content": 0.05676742270588875, "timestamp": "2025-09-10 02:27:17.725981", "step": 1179, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:17.754519", "step": 1179, "epoch": 1 }, { "type": "loss", "content": 0.039605528116226196, "timestamp": "2025-09-10 02:27:17.777900", "step": 1180, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:17.809571", "step": 1180, "epoch": 1 }, { "type": "loss", "content": 0.023134754970669746, "timestamp": "2025-09-10 02:27:17.811603", "step": 1181, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:17.840672", "step": 1181, "epoch": 1 }, { "type": "loss", "content": 0.019693270325660706, "timestamp": "2025-09-10 02:27:17.845192", "step": 1182, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:17.874333", "step": 1182, "epoch": 1 }, { "type": "loss", "content": 0.014443730004131794, "timestamp": "2025-09-10 02:27:17.877418", "step": 1183, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:17.908136", "step": 1183, "epoch": 1 }, { "type": "loss", "content": 0.06374738365411758, "timestamp": "2025-09-10 02:27:17.932824", "step": 1184, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:17.961265", "step": 1184, "epoch": 1 }, { "type": "loss", "content": 0.06489721685647964, "timestamp": "2025-09-10 02:27:17.963135", "step": 1185, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:17.991928", "step": 1185, "epoch": 1 }, { "type": "loss", "content": 0.02132502570748329, "timestamp": "2025-09-10 02:27:17.995007", "step": 1186, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:18.023920", "step": 1186, "epoch": 1 }, { "type": "loss", "content": 0.045552726835012436, "timestamp": "2025-09-10 02:27:18.025883", "step": 1187, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:18.072998", "step": 1187, "epoch": 1 }, { "type": "loss", "content": 0.025895103812217712, "timestamp": "2025-09-10 02:27:18.096191", "step": 1188, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:18.130616", "step": 1188, "epoch": 1 }, { "type": "loss", "content": 0.042253535240888596, "timestamp": "2025-09-10 02:27:18.132789", "step": 1189, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:18.161157", "step": 1189, "epoch": 1 }, { "type": "loss", "content": 0.0021609091199934483, "timestamp": "2025-09-10 02:27:18.162981", "step": 1190, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:18.191456", "step": 1190, "epoch": 1 }, { "type": "loss", "content": 0.051309734582901, "timestamp": "2025-09-10 02:27:18.193300", "step": 1191, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:18.221795", "step": 1191, "epoch": 1 }, { "type": "loss", "content": 0.027675259858369827, "timestamp": "2025-09-10 02:27:18.248515", "step": 1192, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:18.277368", "step": 1192, "epoch": 1 }, { "type": "loss", "content": 0.031873274594545364, "timestamp": "2025-09-10 02:27:18.279103", "step": 1193, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:18.308468", "step": 1193, "epoch": 1 }, { "type": "loss", "content": 0.05416679382324219, "timestamp": "2025-09-10 02:27:18.310571", "step": 1194, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:18.344175", "step": 1194, "epoch": 1 }, { "type": "loss", "content": 0.06558862328529358, "timestamp": "2025-09-10 02:27:18.346231", "step": 1195, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:18.376459", "step": 1195, "epoch": 1 }, { "type": "loss", "content": 0.03938909247517586, "timestamp": "2025-09-10 02:27:18.399756", "step": 1196, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:18.429267", "step": 1196, "epoch": 1 }, { "type": "loss", "content": 0.06032661348581314, "timestamp": "2025-09-10 02:27:18.431733", "step": 1197, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:18.460381", "step": 1197, "epoch": 1 }, { "type": "loss", "content": 0.06398675590753555, "timestamp": "2025-09-10 02:27:18.466894", "step": 1198, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:18.501425", "step": 1198, "epoch": 1 }, { "type": "loss", "content": 0.009640135802328587, "timestamp": "2025-09-10 02:27:18.502991", "step": 1199, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:18.531851", "step": 1199, "epoch": 1 }, { "type": "loss", "content": 0.018345508724451065, "timestamp": "2025-09-10 02:27:18.554961", "step": 1200, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:18.584382", "step": 1200, "epoch": 1 }, { "type": "loss", "content": 0.08645867556333542, "timestamp": "2025-09-10 02:27:18.586281", "step": 1201, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:18.615871", "step": 1201, "epoch": 1 }, { "type": "loss", "content": 0.022902602329850197, "timestamp": "2025-09-10 02:27:18.617826", "step": 1202, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:18.647165", "step": 1202, "epoch": 1 }, { "type": "loss", "content": 0.033925969153642654, "timestamp": "2025-09-10 02:27:18.648810", "step": 1203, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:18.678337", "step": 1203, "epoch": 1 }, { "type": "loss", "content": 0.04151107743382454, "timestamp": "2025-09-10 02:27:18.701792", "step": 1204, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:18.731573", "step": 1204, "epoch": 1 }, { "type": "loss", "content": 0.04618053510785103, "timestamp": "2025-09-10 02:27:18.733370", "step": 1205, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:18.762981", "step": 1205, "epoch": 1 }, { "type": "loss", "content": 0.025517631322145462, "timestamp": "2025-09-10 02:27:18.764945", "step": 1206, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:18.794351", "step": 1206, "epoch": 1 }, { "type": "loss", "content": 0.007069730665534735, "timestamp": "2025-09-10 02:27:18.797417", "step": 1207, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:18.828343", "step": 1207, "epoch": 1 }, { "type": "loss", "content": 0.0422409251332283, "timestamp": "2025-09-10 02:27:18.851593", "step": 1208, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:18.880443", "step": 1208, "epoch": 1 }, { "type": "loss", "content": 0.0322219543159008, "timestamp": "2025-09-10 02:27:18.882417", "step": 1209, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:18.911728", "step": 1209, "epoch": 1 }, { "type": "loss", "content": 0.03617298603057861, "timestamp": "2025-09-10 02:27:18.913665", "step": 1210, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:18.942380", "step": 1210, "epoch": 1 }, { "type": "loss", "content": 0.03871258348226547, "timestamp": "2025-09-10 02:27:18.944204", "step": 1211, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:18.973455", "step": 1211, "epoch": 1 }, { "type": "loss", "content": 0.025040697306394577, "timestamp": "2025-09-10 02:27:18.996689", "step": 1212, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:19.025611", "step": 1212, "epoch": 1 }, { "type": "loss", "content": 0.023504246026277542, "timestamp": "2025-09-10 02:27:19.027880", "step": 1213, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:19.056355", "step": 1213, "epoch": 1 }, { "type": "loss", "content": 0.05998089909553528, "timestamp": "2025-09-10 02:27:19.058082", "step": 1214, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:19.086744", "step": 1214, "epoch": 1 }, { "type": "loss", "content": 0.030563218519091606, "timestamp": "2025-09-10 02:27:19.088884", "step": 1215, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:19.117472", "step": 1215, "epoch": 1 }, { "type": "loss", "content": 0.03772884979844093, "timestamp": "2025-09-10 02:27:19.140764", "step": 1216, "epoch": 1 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 96 ], "batch_size": 8, "flops": 1898404492032 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 5, 80 ], "batch_size": 8, "flops": 1582003754624 } ], "timestamp": "2025-09-10 02:27:21.037220", "step": 1216, "epoch": 1 }, { "type": "pplx", "content": 2289026.425039921, "timestamp": "2025-09-10 02:27:21.039105", "step": 1216, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:21.066171", "step": 1216, "epoch": 1 }, { "type": "loss", "content": 0.026032647117972374, "timestamp": "2025-09-10 02:27:21.068040", "step": 1217, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:21.097122", "step": 1217, "epoch": 1 }, { "type": "loss", "content": 0.04040813446044922, "timestamp": "2025-09-10 02:27:21.099051", "step": 1218, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:21.127652", "step": 1218, "epoch": 1 }, { "type": "loss", "content": 0.02689814381301403, "timestamp": "2025-09-10 02:27:21.129663", "step": 1219, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:21.158619", "step": 1219, "epoch": 1 }, { "type": "loss", "content": 0.02480989508330822, "timestamp": "2025-09-10 02:27:21.182128", "step": 1220, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:21.210985", "step": 1220, "epoch": 1 }, { "type": "loss", "content": 0.03290778771042824, "timestamp": "2025-09-10 02:27:21.212803", "step": 1221, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:21.241736", "step": 1221, "epoch": 1 }, { "type": "loss", "content": 0.056918807327747345, "timestamp": "2025-09-10 02:27:21.243313", "step": 1222, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:21.271979", "step": 1222, "epoch": 1 }, { "type": "loss", "content": 0.035863909870386124, "timestamp": "2025-09-10 02:27:21.273869", "step": 1223, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:21.302495", "step": 1223, "epoch": 1 }, { "type": "loss", "content": 0.026448115706443787, "timestamp": "2025-09-10 02:27:21.325703", "step": 1224, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:21.354467", "step": 1224, "epoch": 1 }, { "type": "loss", "content": 0.0297726821154356, "timestamp": "2025-09-10 02:27:21.356185", "step": 1225, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:21.385415", "step": 1225, "epoch": 1 }, { "type": "loss", "content": 0.03371063619852066, "timestamp": "2025-09-10 02:27:21.387182", "step": 1226, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:21.415395", "step": 1226, "epoch": 1 }, { "type": "loss", "content": 0.0271589495241642, "timestamp": "2025-09-10 02:27:21.417243", "step": 1227, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:21.445695", "step": 1227, "epoch": 1 }, { "type": "loss", "content": 0.03135223314166069, "timestamp": "2025-09-10 02:27:21.468981", "step": 1228, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:21.497808", "step": 1228, "epoch": 1 }, { "type": "loss", "content": 0.0368199348449707, "timestamp": "2025-09-10 02:27:21.499522", "step": 1229, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:21.528536", "step": 1229, "epoch": 1 }, { "type": "loss", "content": 0.025787577033042908, "timestamp": "2025-09-10 02:27:21.530851", "step": 1230, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:21.559139", "step": 1230, "epoch": 1 }, { "type": "loss", "content": 0.04452862963080406, "timestamp": "2025-09-10 02:27:21.560909", "step": 1231, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:21.589342", "step": 1231, "epoch": 1 }, { "type": "loss", "content": 0.04641091451048851, "timestamp": "2025-09-10 02:27:21.612489", "step": 1232, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:21.641342", "step": 1232, "epoch": 1 }, { "type": "loss", "content": 0.06339367479085922, "timestamp": "2025-09-10 02:27:21.643099", "step": 1233, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:21.671645", "step": 1233, "epoch": 1 }, { "type": "loss", "content": 0.012783932499587536, "timestamp": "2025-09-10 02:27:21.673567", "step": 1234, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:21.702110", "step": 1234, "epoch": 1 }, { "type": "loss", "content": 0.005669086240231991, "timestamp": "2025-09-10 02:27:21.703830", "step": 1235, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:21.732556", "step": 1235, "epoch": 1 }, { "type": "loss", "content": 0.02278485894203186, "timestamp": "2025-09-10 02:27:21.755592", "step": 1236, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:21.784522", "step": 1236, "epoch": 1 }, { "type": "loss", "content": 0.029380736872553825, "timestamp": "2025-09-10 02:27:21.786524", "step": 1237, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:21.815346", "step": 1237, "epoch": 1 }, { "type": "loss", "content": 0.05142504349350929, "timestamp": "2025-09-10 02:27:21.817031", "step": 1238, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:21.845793", "step": 1238, "epoch": 1 }, { "type": "loss", "content": 0.039533525705337524, "timestamp": "2025-09-10 02:27:21.847726", "step": 1239, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:21.876616", "step": 1239, "epoch": 1 }, { "type": "loss", "content": 0.03197439759969711, "timestamp": "2025-09-10 02:27:21.899708", "step": 1240, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:21.928794", "step": 1240, "epoch": 1 }, { "type": "loss", "content": 0.05023592710494995, "timestamp": "2025-09-10 02:27:21.930686", "step": 1241, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:21.960377", "step": 1241, "epoch": 1 }, { "type": "loss", "content": 0.04292667284607887, "timestamp": "2025-09-10 02:27:21.962103", "step": 1242, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:21.990690", "step": 1242, "epoch": 1 }, { "type": "loss", "content": 0.059419870376586914, "timestamp": "2025-09-10 02:27:21.992499", "step": 1243, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:22.021517", "step": 1243, "epoch": 1 }, { "type": "loss", "content": 0.03515687957406044, "timestamp": "2025-09-10 02:27:22.044653", "step": 1244, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:22.073481", "step": 1244, "epoch": 1 }, { "type": "loss", "content": 0.04057113081216812, "timestamp": "2025-09-10 02:27:22.075201", "step": 1245, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:22.103971", "step": 1245, "epoch": 1 }, { "type": "loss", "content": 0.057592350989580154, "timestamp": "2025-09-10 02:27:22.106006", "step": 1246, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:27:22.135112", "step": 1246, "epoch": 1 }, { "type": "loss", "content": 0.04547825828194618, "timestamp": "2025-09-10 02:27:22.136826", "step": 1247, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:22.165294", "step": 1247, "epoch": 1 }, { "type": "loss", "content": 0.03685108572244644, "timestamp": "2025-09-10 02:27:22.188846", "step": 1248, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:22.217662", "step": 1248, "epoch": 1 }, { "type": "loss", "content": 0.046049814671278, "timestamp": "2025-09-10 02:27:22.219523", "step": 1249, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:22.248592", "step": 1249, "epoch": 1 }, { "type": "loss", "content": 0.03411615639925003, "timestamp": "2025-09-10 02:27:22.250463", "step": 1250, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:22.279405", "step": 1250, "epoch": 1 }, { "type": "loss", "content": 0.036534737795591354, "timestamp": "2025-09-10 02:27:22.281283", "step": 1251, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:22.309914", "step": 1251, "epoch": 1 }, { "type": "loss", "content": 0.060973696410655975, "timestamp": "2025-09-10 02:27:22.333292", "step": 1252, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:22.361797", "step": 1252, "epoch": 1 }, { "type": "loss", "content": 0.029213469475507736, "timestamp": "2025-09-10 02:27:22.363470", "step": 1253, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:22.392225", "step": 1253, "epoch": 1 }, { "type": "loss", "content": 0.055876441299915314, "timestamp": "2025-09-10 02:27:22.393797", "step": 1254, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:22.422080", "step": 1254, "epoch": 1 }, { "type": "loss", "content": 0.035937659442424774, "timestamp": "2025-09-10 02:27:22.423958", "step": 1255, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:22.452559", "step": 1255, "epoch": 1 }, { "type": "loss", "content": 0.06300339847803116, "timestamp": "2025-09-10 02:27:22.475653", "step": 1256, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:22.504634", "step": 1256, "epoch": 1 }, { "type": "loss", "content": 0.06337703764438629, "timestamp": "2025-09-10 02:27:22.506290", "step": 1257, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:22.535092", "step": 1257, "epoch": 1 }, { "type": "loss", "content": 0.02344011329114437, "timestamp": "2025-09-10 02:27:22.536912", "step": 1258, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:22.565690", "step": 1258, "epoch": 1 }, { "type": "loss", "content": 0.02030993066728115, "timestamp": "2025-09-10 02:27:22.567405", "step": 1259, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:22.596200", "step": 1259, "epoch": 1 }, { "type": "loss", "content": 0.07915498316287994, "timestamp": "2025-09-10 02:27:22.619572", "step": 1260, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:22.648198", "step": 1260, "epoch": 1 }, { "type": "loss", "content": 0.03038150444626808, "timestamp": "2025-09-10 02:27:22.649817", "step": 1261, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:22.678375", "step": 1261, "epoch": 1 }, { "type": "loss", "content": 0.02446349337697029, "timestamp": "2025-09-10 02:27:22.680206", "step": 1262, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:22.708899", "step": 1262, "epoch": 1 }, { "type": "loss", "content": 0.03944491222500801, "timestamp": "2025-09-10 02:27:22.710716", "step": 1263, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:22.739464", "step": 1263, "epoch": 1 }, { "type": "loss", "content": 0.04451688751578331, "timestamp": "2025-09-10 02:27:22.762678", "step": 1264, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:22.791541", "step": 1264, "epoch": 1 }, { "type": "loss", "content": 0.023774994537234306, "timestamp": "2025-09-10 02:27:22.793290", "step": 1265, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:22.822023", "step": 1265, "epoch": 1 }, { "type": "loss", "content": 0.06238982081413269, "timestamp": "2025-09-10 02:27:22.823837", "step": 1266, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:22.852597", "step": 1266, "epoch": 1 }, { "type": "loss", "content": 0.01891942135989666, "timestamp": "2025-09-10 02:27:22.854419", "step": 1267, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:22.883347", "step": 1267, "epoch": 1 }, { "type": "loss", "content": 0.016256926581263542, "timestamp": "2025-09-10 02:27:22.906695", "step": 1268, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:22.935439", "step": 1268, "epoch": 1 }, { "type": "loss", "content": 0.020205358043313026, "timestamp": "2025-09-10 02:27:22.937064", "step": 1269, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:22.966635", "step": 1269, "epoch": 1 }, { "type": "loss", "content": 0.04541391506791115, "timestamp": "2025-09-10 02:27:22.972446", "step": 1270, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:23.011426", "step": 1270, "epoch": 1 }, { "type": "loss", "content": 0.02258029393851757, "timestamp": "2025-09-10 02:27:23.013224", "step": 1271, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:23.042267", "step": 1271, "epoch": 1 }, { "type": "loss", "content": 0.0459962822496891, "timestamp": "2025-09-10 02:27:23.065373", "step": 1272, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:23.098320", "step": 1272, "epoch": 1 }, { "type": "loss", "content": 0.04839509725570679, "timestamp": "2025-09-10 02:27:23.100118", "step": 1273, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:23.128600", "step": 1273, "epoch": 1 }, { "type": "loss", "content": 0.022308675572276115, "timestamp": "2025-09-10 02:27:23.136317", "step": 1274, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:23.168579", "step": 1274, "epoch": 1 }, { "type": "loss", "content": 0.008223294280469418, "timestamp": "2025-09-10 02:27:23.170457", "step": 1275, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:23.199084", "step": 1275, "epoch": 1 }, { "type": "loss", "content": 0.027793053537607193, "timestamp": "2025-09-10 02:27:23.222516", "step": 1276, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:23.253899", "step": 1276, "epoch": 1 }, { "type": "loss", "content": 0.01381795946508646, "timestamp": "2025-09-10 02:27:23.255913", "step": 1277, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:23.284641", "step": 1277, "epoch": 1 }, { "type": "loss", "content": 0.06890726834535599, "timestamp": "2025-09-10 02:27:23.286567", "step": 1278, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:23.315360", "step": 1278, "epoch": 1 }, { "type": "loss", "content": 0.03059798665344715, "timestamp": "2025-09-10 02:27:23.316893", "step": 1279, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:23.345475", "step": 1279, "epoch": 1 }, { "type": "loss", "content": 0.044452693313360214, "timestamp": "2025-09-10 02:27:23.368696", "step": 1280, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:27:23.401098", "step": 1280, "epoch": 1 }, { "type": "loss", "content": 0.017186051234602928, "timestamp": "2025-09-10 02:27:23.403073", "step": 1281, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:23.436685", "step": 1281, "epoch": 1 }, { "type": "loss", "content": 0.0315559059381485, "timestamp": "2025-09-10 02:27:23.440055", "step": 1282, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:23.474771", "step": 1282, "epoch": 1 }, { "type": "loss", "content": 0.036582719534635544, "timestamp": "2025-09-10 02:27:23.476604", "step": 1283, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:23.504947", "step": 1283, "epoch": 1 }, { "type": "loss", "content": 0.03221450373530388, "timestamp": "2025-09-10 02:27:23.528083", "step": 1284, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:23.558025", "step": 1284, "epoch": 1 }, { "type": "loss", "content": 0.033226583153009415, "timestamp": "2025-09-10 02:27:23.562953", "step": 1285, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:23.592128", "step": 1285, "epoch": 1 }, { "type": "loss", "content": 0.04618069529533386, "timestamp": "2025-09-10 02:27:23.594036", "step": 1286, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:23.624690", "step": 1286, "epoch": 1 }, { "type": "loss", "content": 0.01848338171839714, "timestamp": "2025-09-10 02:27:23.626551", "step": 1287, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:23.655198", "step": 1287, "epoch": 1 }, { "type": "loss", "content": 0.0044882288202643394, "timestamp": "2025-09-10 02:27:23.678347", "step": 1288, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:23.707114", "step": 1288, "epoch": 1 }, { "type": "loss", "content": 0.04217400401830673, "timestamp": "2025-09-10 02:27:23.708792", "step": 1289, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:23.741209", "step": 1289, "epoch": 1 }, { "type": "loss", "content": 0.0670870840549469, "timestamp": "2025-09-10 02:27:23.743259", "step": 1290, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:23.772245", "step": 1290, "epoch": 1 }, { "type": "loss", "content": 0.08485446870326996, "timestamp": "2025-09-10 02:27:23.774114", "step": 1291, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:23.803060", "step": 1291, "epoch": 1 }, { "type": "loss", "content": 0.07073134928941727, "timestamp": "2025-09-10 02:27:23.826640", "step": 1292, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:23.855312", "step": 1292, "epoch": 1 }, { "type": "loss", "content": 0.02002301998436451, "timestamp": "2025-09-10 02:27:23.857111", "step": 1293, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:23.885696", "step": 1293, "epoch": 1 }, { "type": "loss", "content": 0.06766389310359955, "timestamp": "2025-09-10 02:27:23.887439", "step": 1294, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:23.916028", "step": 1294, "epoch": 1 }, { "type": "loss", "content": 0.03803431987762451, "timestamp": "2025-09-10 02:27:23.917881", "step": 1295, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:23.946494", "step": 1295, "epoch": 1 }, { "type": "loss", "content": 0.03328724578022957, "timestamp": "2025-09-10 02:27:23.969781", "step": 1296, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:23.998826", "step": 1296, "epoch": 1 }, { "type": "loss", "content": 0.021175900474190712, "timestamp": "2025-09-10 02:27:24.000458", "step": 1297, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:24.029385", "step": 1297, "epoch": 1 }, { "type": "loss", "content": 0.011340412311255932, "timestamp": "2025-09-10 02:27:24.031309", "step": 1298, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:24.061093", "step": 1298, "epoch": 1 }, { "type": "loss", "content": 0.027038246393203735, "timestamp": "2025-09-10 02:27:24.062963", "step": 1299, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:24.091782", "step": 1299, "epoch": 1 }, { "type": "loss", "content": 0.023592408746480942, "timestamp": "2025-09-10 02:27:24.114992", "step": 1300, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:24.144301", "step": 1300, "epoch": 1 }, { "type": "loss", "content": 0.024524888023734093, "timestamp": "2025-09-10 02:27:24.145908", "step": 1301, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:24.174488", "step": 1301, "epoch": 1 }, { "type": "loss", "content": 0.03530529886484146, "timestamp": "2025-09-10 02:27:24.176350", "step": 1302, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:24.205257", "step": 1302, "epoch": 1 }, { "type": "loss", "content": 0.013897881843149662, "timestamp": "2025-09-10 02:27:24.206877", "step": 1303, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:24.235354", "step": 1303, "epoch": 1 }, { "type": "loss", "content": 0.011112040840089321, "timestamp": "2025-09-10 02:27:24.258498", "step": 1304, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:24.287332", "step": 1304, "epoch": 1 }, { "type": "loss", "content": 0.015398895367980003, "timestamp": "2025-09-10 02:27:24.289133", "step": 1305, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:24.318088", "step": 1305, "epoch": 1 }, { "type": "loss", "content": 0.0026412815786898136, "timestamp": "2025-09-10 02:27:24.319913", "step": 1306, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:24.348736", "step": 1306, "epoch": 1 }, { "type": "loss", "content": 0.02359415777027607, "timestamp": "2025-09-10 02:27:24.350653", "step": 1307, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:24.379425", "step": 1307, "epoch": 1 }, { "type": "loss", "content": 0.023567870259284973, "timestamp": "2025-09-10 02:27:24.402747", "step": 1308, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:24.431650", "step": 1308, "epoch": 1 }, { "type": "loss", "content": 0.05291704088449478, "timestamp": "2025-09-10 02:27:24.433423", "step": 1309, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:24.461934", "step": 1309, "epoch": 1 }, { "type": "loss", "content": 0.05972858890891075, "timestamp": "2025-09-10 02:27:24.463571", "step": 1310, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:24.492130", "step": 1310, "epoch": 1 }, { "type": "loss", "content": 0.019543515518307686, "timestamp": "2025-09-10 02:27:24.493856", "step": 1311, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:24.522812", "step": 1311, "epoch": 1 }, { "type": "loss", "content": 0.012841133400797844, "timestamp": "2025-09-10 02:27:24.546268", "step": 1312, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:24.574965", "step": 1312, "epoch": 1 }, { "type": "loss", "content": 0.012684354558587074, "timestamp": "2025-09-10 02:27:24.576889", "step": 1313, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:24.605507", "step": 1313, "epoch": 1 }, { "type": "loss", "content": 0.06203369051218033, "timestamp": "2025-09-10 02:27:24.607346", "step": 1314, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:24.636018", "step": 1314, "epoch": 1 }, { "type": "loss", "content": 0.03298981487751007, "timestamp": "2025-09-10 02:27:24.637697", "step": 1315, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:24.665789", "step": 1315, "epoch": 1 }, { "type": "loss", "content": 0.07147373259067535, "timestamp": "2025-09-10 02:27:24.689182", "step": 1316, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:24.718102", "step": 1316, "epoch": 1 }, { "type": "loss", "content": 0.0012829465558752418, "timestamp": "2025-09-10 02:27:24.720033", "step": 1317, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:24.748278", "step": 1317, "epoch": 1 }, { "type": "loss", "content": 0.06342272460460663, "timestamp": "2025-09-10 02:27:24.750024", "step": 1318, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:24.778675", "step": 1318, "epoch": 1 }, { "type": "loss", "content": 0.001892492757178843, "timestamp": "2025-09-10 02:27:24.780408", "step": 1319, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:24.809008", "step": 1319, "epoch": 1 }, { "type": "loss", "content": 0.07816839218139648, "timestamp": "2025-09-10 02:27:24.832332", "step": 1320, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:24.861108", "step": 1320, "epoch": 1 }, { "type": "loss", "content": 0.0809142217040062, "timestamp": "2025-09-10 02:27:24.862562", "step": 1321, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:24.890969", "step": 1321, "epoch": 1 }, { "type": "loss", "content": 0.02756485342979431, "timestamp": "2025-09-10 02:27:24.892824", "step": 1322, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:24.921260", "step": 1322, "epoch": 1 }, { "type": "loss", "content": 0.05552193894982338, "timestamp": "2025-09-10 02:27:24.922862", "step": 1323, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:24.952011", "step": 1323, "epoch": 1 }, { "type": "loss", "content": 0.05214642733335495, "timestamp": "2025-09-10 02:27:24.975203", "step": 1324, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:25.004458", "step": 1324, "epoch": 1 }, { "type": "loss", "content": 0.017305118963122368, "timestamp": "2025-09-10 02:27:25.006338", "step": 1325, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:25.035966", "step": 1325, "epoch": 1 }, { "type": "loss", "content": 0.037766702473163605, "timestamp": "2025-09-10 02:27:25.037621", "step": 1326, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:25.066325", "step": 1326, "epoch": 1 }, { "type": "loss", "content": 0.03526047244668007, "timestamp": "2025-09-10 02:27:25.068231", "step": 1327, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:25.098267", "step": 1327, "epoch": 1 }, { "type": "loss", "content": 0.03304123878479004, "timestamp": "2025-09-10 02:27:25.121609", "step": 1328, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:25.151203", "step": 1328, "epoch": 1 }, { "type": "loss", "content": 0.036667678505182266, "timestamp": "2025-09-10 02:27:25.152843", "step": 1329, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:25.181525", "step": 1329, "epoch": 1 }, { "type": "loss", "content": 0.028064250946044922, "timestamp": "2025-09-10 02:27:25.183440", "step": 1330, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:25.211924", "step": 1330, "epoch": 1 }, { "type": "loss", "content": 0.05444202199578285, "timestamp": "2025-09-10 02:27:25.213490", "step": 1331, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:25.242331", "step": 1331, "epoch": 1 }, { "type": "loss", "content": 0.04092909023165703, "timestamp": "2025-09-10 02:27:25.265266", "step": 1332, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:25.295180", "step": 1332, "epoch": 1 }, { "type": "loss", "content": 0.013612611219286919, "timestamp": "2025-09-10 02:27:25.297057", "step": 1333, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:25.327124", "step": 1333, "epoch": 1 }, { "type": "loss", "content": 0.034610893577337265, "timestamp": "2025-09-10 02:27:25.328787", "step": 1334, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:25.357837", "step": 1334, "epoch": 1 }, { "type": "loss", "content": 0.054943304508924484, "timestamp": "2025-09-10 02:27:25.359688", "step": 1335, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:25.389019", "step": 1335, "epoch": 1 }, { "type": "loss", "content": 0.029924849048256874, "timestamp": "2025-09-10 02:27:25.412329", "step": 1336, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:25.441649", "step": 1336, "epoch": 1 }, { "type": "loss", "content": 0.04340161755681038, "timestamp": "2025-09-10 02:27:25.443305", "step": 1337, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:25.471960", "step": 1337, "epoch": 1 }, { "type": "loss", "content": 0.060874827206134796, "timestamp": "2025-09-10 02:27:25.474073", "step": 1338, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:25.502774", "step": 1338, "epoch": 1 }, { "type": "loss", "content": 0.03564821928739548, "timestamp": "2025-09-10 02:27:25.504949", "step": 1339, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:25.534082", "step": 1339, "epoch": 1 }, { "type": "loss", "content": 0.06171872466802597, "timestamp": "2025-09-10 02:27:25.557291", "step": 1340, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:25.586922", "step": 1340, "epoch": 1 }, { "type": "loss", "content": 0.04816806688904762, "timestamp": "2025-09-10 02:27:25.589893", "step": 1341, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:25.618100", "step": 1341, "epoch": 1 }, { "type": "loss", "content": 0.02508343569934368, "timestamp": "2025-09-10 02:27:25.619879", "step": 1342, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:25.648973", "step": 1342, "epoch": 1 }, { "type": "loss", "content": 0.03181108459830284, "timestamp": "2025-09-10 02:27:25.650392", "step": 1343, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:25.679385", "step": 1343, "epoch": 1 }, { "type": "loss", "content": 0.03315838426351547, "timestamp": "2025-09-10 02:27:25.702800", "step": 1344, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:25.731711", "step": 1344, "epoch": 1 }, { "type": "loss", "content": 0.014892424456775188, "timestamp": "2025-09-10 02:27:25.733270", "step": 1345, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:25.761950", "step": 1345, "epoch": 1 }, { "type": "loss", "content": 0.05184555426239967, "timestamp": "2025-09-10 02:27:25.763554", "step": 1346, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:25.792001", "step": 1346, "epoch": 1 }, { "type": "loss", "content": 0.029433416202664375, "timestamp": "2025-09-10 02:27:25.793680", "step": 1347, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:25.822146", "step": 1347, "epoch": 1 }, { "type": "loss", "content": 0.038075923919677734, "timestamp": "2025-09-10 02:27:25.845383", "step": 1348, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:25.874069", "step": 1348, "epoch": 1 }, { "type": "loss", "content": 0.024071261286735535, "timestamp": "2025-09-10 02:27:25.875796", "step": 1349, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:25.904319", "step": 1349, "epoch": 1 }, { "type": "loss", "content": 0.011127389967441559, "timestamp": "2025-09-10 02:27:25.906209", "step": 1350, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:25.934896", "step": 1350, "epoch": 1 }, { "type": "loss", "content": 0.04546486213803291, "timestamp": "2025-09-10 02:27:25.936795", "step": 1351, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:25.964997", "step": 1351, "epoch": 1 }, { "type": "loss", "content": 0.031817514449357986, "timestamp": "2025-09-10 02:27:25.988359", "step": 1352, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:26.017349", "step": 1352, "epoch": 1 }, { "type": "loss", "content": 0.04650825262069702, "timestamp": "2025-09-10 02:27:26.019266", "step": 1353, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:26.048007", "step": 1353, "epoch": 1 }, { "type": "loss", "content": 0.061909276992082596, "timestamp": "2025-09-10 02:27:26.049835", "step": 1354, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:26.078465", "step": 1354, "epoch": 1 }, { "type": "loss", "content": 0.0383191742002964, "timestamp": "2025-09-10 02:27:26.080320", "step": 1355, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:26.109443", "step": 1355, "epoch": 1 }, { "type": "loss", "content": 0.03044038824737072, "timestamp": "2025-09-10 02:27:26.132773", "step": 1356, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:26.161530", "step": 1356, "epoch": 1 }, { "type": "loss", "content": 0.03768446296453476, "timestamp": "2025-09-10 02:27:26.163376", "step": 1357, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:26.191854", "step": 1357, "epoch": 1 }, { "type": "loss", "content": 0.059272684156894684, "timestamp": "2025-09-10 02:27:26.193841", "step": 1358, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:26.222364", "step": 1358, "epoch": 1 }, { "type": "loss", "content": 0.03366556391119957, "timestamp": "2025-09-10 02:27:26.224210", "step": 1359, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:26.253159", "step": 1359, "epoch": 1 }, { "type": "loss", "content": 0.016975143924355507, "timestamp": "2025-09-10 02:27:26.276192", "step": 1360, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:26.305011", "step": 1360, "epoch": 1 }, { "type": "loss", "content": 0.043722160160541534, "timestamp": "2025-09-10 02:27:26.306625", "step": 1361, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:27:26.335283", "step": 1361, "epoch": 1 }, { "type": "loss", "content": 0.04664004221558571, "timestamp": "2025-09-10 02:27:26.337083", "step": 1362, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:26.365836", "step": 1362, "epoch": 1 }, { "type": "loss", "content": 0.04614395648241043, "timestamp": "2025-09-10 02:27:26.367448", "step": 1363, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:26.396613", "step": 1363, "epoch": 1 }, { "type": "loss", "content": 0.019763609394431114, "timestamp": "2025-09-10 02:27:26.420097", "step": 1364, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:26.449219", "step": 1364, "epoch": 1 }, { "type": "loss", "content": 0.03162803500890732, "timestamp": "2025-09-10 02:27:26.451064", "step": 1365, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:26.480075", "step": 1365, "epoch": 1 }, { "type": "loss", "content": 0.030755044892430305, "timestamp": "2025-09-10 02:27:26.481471", "step": 1366, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:26.510063", "step": 1366, "epoch": 1 }, { "type": "loss", "content": 0.03401505574584007, "timestamp": "2025-09-10 02:27:26.511882", "step": 1367, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:26.541269", "step": 1367, "epoch": 1 }, { "type": "loss", "content": 0.041028063744306564, "timestamp": "2025-09-10 02:27:26.564400", "step": 1368, "epoch": 1 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 96 ], "batch_size": 8, "flops": 1898404492032 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 5, 80 ], "batch_size": 8, "flops": 1582003754624 } ], "timestamp": "2025-09-10 02:27:28.441898", "step": 1368, "epoch": 1 }, { "type": "pplx", "content": 2434394.08545723, "timestamp": "2025-09-10 02:27:28.443674", "step": 1368, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:28.470905", "step": 1368, "epoch": 1 }, { "type": "loss", "content": 0.05904477462172508, "timestamp": "2025-09-10 02:27:28.472700", "step": 1369, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:28.501163", "step": 1369, "epoch": 1 }, { "type": "loss", "content": 0.04174638167023659, "timestamp": "2025-09-10 02:27:28.502976", "step": 1370, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:28.531659", "step": 1370, "epoch": 1 }, { "type": "loss", "content": 0.022512640804052353, "timestamp": "2025-09-10 02:27:28.533441", "step": 1371, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:28.565795", "step": 1371, "epoch": 1 }, { "type": "loss", "content": 0.045179132372140884, "timestamp": "2025-09-10 02:27:28.589340", "step": 1372, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:28.618289", "step": 1372, "epoch": 1 }, { "type": "loss", "content": 0.048660069704055786, "timestamp": "2025-09-10 02:27:28.620142", "step": 1373, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:28.648768", "step": 1373, "epoch": 1 }, { "type": "loss", "content": 0.03543194383382797, "timestamp": "2025-09-10 02:27:28.650597", "step": 1374, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:28.679648", "step": 1374, "epoch": 1 }, { "type": "loss", "content": 0.02141200564801693, "timestamp": "2025-09-10 02:27:28.683184", "step": 1375, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:28.716161", "step": 1375, "epoch": 1 }, { "type": "loss", "content": 0.03578066825866699, "timestamp": "2025-09-10 02:27:28.739558", "step": 1376, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:28.768689", "step": 1376, "epoch": 1 }, { "type": "loss", "content": 0.04398437216877937, "timestamp": "2025-09-10 02:27:28.770902", "step": 1377, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:28.799421", "step": 1377, "epoch": 1 }, { "type": "loss", "content": 0.05702298879623413, "timestamp": "2025-09-10 02:27:28.801203", "step": 1378, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:28.841154", "step": 1378, "epoch": 1 }, { "type": "loss", "content": 0.039825230836868286, "timestamp": "2025-09-10 02:27:28.842965", "step": 1379, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:28.872672", "step": 1379, "epoch": 1 }, { "type": "loss", "content": 0.07406977564096451, "timestamp": "2025-09-10 02:27:28.895862", "step": 1380, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:28.924385", "step": 1380, "epoch": 1 }, { "type": "loss", "content": 0.05747872591018677, "timestamp": "2025-09-10 02:27:28.931011", "step": 1381, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:28.959586", "step": 1381, "epoch": 1 }, { "type": "loss", "content": 0.05309832841157913, "timestamp": "2025-09-10 02:27:28.961376", "step": 1382, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:28.990092", "step": 1382, "epoch": 1 }, { "type": "loss", "content": 0.03823574632406235, "timestamp": "2025-09-10 02:27:28.991800", "step": 1383, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:29.020402", "step": 1383, "epoch": 1 }, { "type": "loss", "content": 0.02102424018085003, "timestamp": "2025-09-10 02:27:29.043641", "step": 1384, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:29.072209", "step": 1384, "epoch": 1 }, { "type": "loss", "content": 0.01896662451326847, "timestamp": "2025-09-10 02:27:29.074094", "step": 1385, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:29.102653", "step": 1385, "epoch": 1 }, { "type": "loss", "content": 0.020455926656723022, "timestamp": "2025-09-10 02:27:29.104239", "step": 1386, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:29.133020", "step": 1386, "epoch": 1 }, { "type": "loss", "content": 0.012913455255329609, "timestamp": "2025-09-10 02:27:29.134826", "step": 1387, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:29.163800", "step": 1387, "epoch": 1 }, { "type": "loss", "content": 0.038935162127017975, "timestamp": "2025-09-10 02:27:29.187067", "step": 1388, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:29.215552", "step": 1388, "epoch": 1 }, { "type": "loss", "content": 0.026753125712275505, "timestamp": "2025-09-10 02:27:29.217490", "step": 1389, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:29.246765", "step": 1389, "epoch": 1 }, { "type": "loss", "content": 0.036826133728027344, "timestamp": "2025-09-10 02:27:29.248536", "step": 1390, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:29.277180", "step": 1390, "epoch": 1 }, { "type": "loss", "content": 0.03888767585158348, "timestamp": "2025-09-10 02:27:29.278959", "step": 1391, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:29.307722", "step": 1391, "epoch": 1 }, { "type": "loss", "content": 0.024767255410552025, "timestamp": "2025-09-10 02:27:29.330817", "step": 1392, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:29.359796", "step": 1392, "epoch": 1 }, { "type": "loss", "content": 0.029736343771219254, "timestamp": "2025-09-10 02:27:29.361497", "step": 1393, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:29.389866", "step": 1393, "epoch": 1 }, { "type": "loss", "content": 0.011788521893322468, "timestamp": "2025-09-10 02:27:29.391779", "step": 1394, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:29.420713", "step": 1394, "epoch": 1 }, { "type": "loss", "content": 0.01132082287222147, "timestamp": "2025-09-10 02:27:29.422430", "step": 1395, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:29.450980", "step": 1395, "epoch": 1 }, { "type": "loss", "content": 0.02078998275101185, "timestamp": "2025-09-10 02:27:29.474254", "step": 1396, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:29.502649", "step": 1396, "epoch": 1 }, { "type": "loss", "content": 0.008231586776673794, "timestamp": "2025-09-10 02:27:29.504515", "step": 1397, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:29.533131", "step": 1397, "epoch": 1 }, { "type": "loss", "content": 0.019323036074638367, "timestamp": "2025-09-10 02:27:29.534822", "step": 1398, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:29.563520", "step": 1398, "epoch": 1 }, { "type": "loss", "content": 0.029249152168631554, "timestamp": "2025-09-10 02:27:29.565256", "step": 1399, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:29.594018", "step": 1399, "epoch": 1 }, { "type": "loss", "content": 0.009170596487820148, "timestamp": "2025-09-10 02:27:29.617294", "step": 1400, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:29.646067", "step": 1400, "epoch": 1 }, { "type": "loss", "content": 0.029752206057310104, "timestamp": "2025-09-10 02:27:29.647661", "step": 1401, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:29.676226", "step": 1401, "epoch": 1 }, { "type": "loss", "content": 0.0362543947994709, "timestamp": "2025-09-10 02:27:29.678085", "step": 1402, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:29.706690", "step": 1402, "epoch": 1 }, { "type": "loss", "content": 0.025247853249311447, "timestamp": "2025-09-10 02:27:29.708628", "step": 1403, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:29.737140", "step": 1403, "epoch": 1 }, { "type": "loss", "content": 0.027348194271326065, "timestamp": "2025-09-10 02:27:29.760403", "step": 1404, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:29.788925", "step": 1404, "epoch": 1 }, { "type": "loss", "content": 0.06653179228305817, "timestamp": "2025-09-10 02:27:29.790587", "step": 1405, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:29.819154", "step": 1405, "epoch": 1 }, { "type": "loss", "content": 0.016154978424310684, "timestamp": "2025-09-10 02:27:29.820770", "step": 1406, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:29.849061", "step": 1406, "epoch": 1 }, { "type": "loss", "content": 0.05642243102192879, "timestamp": "2025-09-10 02:27:29.850292", "step": 1407, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:29.878976", "step": 1407, "epoch": 1 }, { "type": "loss", "content": 0.019452065229415894, "timestamp": "2025-09-10 02:27:29.902168", "step": 1408, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:29.930913", "step": 1408, "epoch": 1 }, { "type": "loss", "content": 0.07361283153295517, "timestamp": "2025-09-10 02:27:29.932456", "step": 1409, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:29.961652", "step": 1409, "epoch": 1 }, { "type": "loss", "content": 0.00370202399790287, "timestamp": "2025-09-10 02:27:29.963206", "step": 1410, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:29.992063", "step": 1410, "epoch": 1 }, { "type": "loss", "content": 0.06461332738399506, "timestamp": "2025-09-10 02:27:29.993564", "step": 1411, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:30.022127", "step": 1411, "epoch": 1 }, { "type": "loss", "content": 0.03506890684366226, "timestamp": "2025-09-10 02:27:30.045216", "step": 1412, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:30.075040", "step": 1412, "epoch": 1 }, { "type": "loss", "content": 0.026593204587697983, "timestamp": "2025-09-10 02:27:30.076280", "step": 1413, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:30.104607", "step": 1413, "epoch": 1 }, { "type": "loss", "content": 0.05700697749853134, "timestamp": "2025-09-10 02:27:30.106264", "step": 1414, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:30.134859", "step": 1414, "epoch": 1 }, { "type": "loss", "content": 0.0367908738553524, "timestamp": "2025-09-10 02:27:30.136335", "step": 1415, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:30.164862", "step": 1415, "epoch": 1 }, { "type": "loss", "content": 0.02905452810227871, "timestamp": "2025-09-10 02:27:30.188075", "step": 1416, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:30.216592", "step": 1416, "epoch": 1 }, { "type": "loss", "content": 0.01928318664431572, "timestamp": "2025-09-10 02:27:30.218317", "step": 1417, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:30.246935", "step": 1417, "epoch": 1 }, { "type": "loss", "content": 0.012731648981571198, "timestamp": "2025-09-10 02:27:30.248564", "step": 1418, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:30.276602", "step": 1418, "epoch": 1 }, { "type": "loss", "content": 0.018270840868353844, "timestamp": "2025-09-10 02:27:30.278265", "step": 1419, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:30.306910", "step": 1419, "epoch": 1 }, { "type": "loss", "content": 0.009613311849534512, "timestamp": "2025-09-10 02:27:30.329917", "step": 1420, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:30.358706", "step": 1420, "epoch": 1 }, { "type": "loss", "content": 0.020077558234333992, "timestamp": "2025-09-10 02:27:30.360179", "step": 1421, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:30.388704", "step": 1421, "epoch": 1 }, { "type": "loss", "content": 0.023439887911081314, "timestamp": "2025-09-10 02:27:30.390178", "step": 1422, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:30.418772", "step": 1422, "epoch": 1 }, { "type": "loss", "content": 0.059081826359033585, "timestamp": "2025-09-10 02:27:30.420397", "step": 1423, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:30.449116", "step": 1423, "epoch": 1 }, { "type": "loss", "content": 0.02458806149661541, "timestamp": "2025-09-10 02:27:30.472364", "step": 1424, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:30.501203", "step": 1424, "epoch": 1 }, { "type": "loss", "content": 0.005730960983783007, "timestamp": "2025-09-10 02:27:30.502852", "step": 1425, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:30.531681", "step": 1425, "epoch": 1 }, { "type": "loss", "content": 0.06985829770565033, "timestamp": "2025-09-10 02:27:30.533405", "step": 1426, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:30.562738", "step": 1426, "epoch": 1 }, { "type": "loss", "content": 0.061879415065050125, "timestamp": "2025-09-10 02:27:30.564218", "step": 1427, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:30.593331", "step": 1427, "epoch": 1 }, { "type": "loss", "content": 0.03619138523936272, "timestamp": "2025-09-10 02:27:30.616559", "step": 1428, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:30.645213", "step": 1428, "epoch": 1 }, { "type": "loss", "content": 0.07978753000497818, "timestamp": "2025-09-10 02:27:30.646679", "step": 1429, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:30.676025", "step": 1429, "epoch": 1 }, { "type": "loss", "content": 0.06555866450071335, "timestamp": "2025-09-10 02:27:30.677684", "step": 1430, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:30.706434", "step": 1430, "epoch": 1 }, { "type": "loss", "content": 0.004598245490342379, "timestamp": "2025-09-10 02:27:30.708157", "step": 1431, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:30.736985", "step": 1431, "epoch": 1 }, { "type": "loss", "content": 0.00360993854701519, "timestamp": "2025-09-10 02:27:30.760311", "step": 1432, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:30.788480", "step": 1432, "epoch": 1 }, { "type": "loss", "content": 0.06020939350128174, "timestamp": "2025-09-10 02:27:30.790148", "step": 1433, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:30.818669", "step": 1433, "epoch": 1 }, { "type": "loss", "content": 0.04477548226714134, "timestamp": "2025-09-10 02:27:30.820102", "step": 1434, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:30.859687", "step": 1434, "epoch": 1 }, { "type": "loss", "content": 0.025800930336117744, "timestamp": "2025-09-10 02:27:30.862378", "step": 1435, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:30.892915", "step": 1435, "epoch": 1 }, { "type": "loss", "content": 0.053567446768283844, "timestamp": "2025-09-10 02:27:30.915782", "step": 1436, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:30.945167", "step": 1436, "epoch": 1 }, { "type": "loss", "content": 0.013317200355231762, "timestamp": "2025-09-10 02:27:30.946822", "step": 1437, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:30.983818", "step": 1437, "epoch": 1 }, { "type": "loss", "content": 0.0013515051687136292, "timestamp": "2025-09-10 02:27:30.985438", "step": 1438, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:31.034200", "step": 1438, "epoch": 1 }, { "type": "loss", "content": 0.01699732430279255, "timestamp": "2025-09-10 02:27:31.035638", "step": 1439, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:31.064112", "step": 1439, "epoch": 1 }, { "type": "loss", "content": 0.03770739212632179, "timestamp": "2025-09-10 02:27:31.087059", "step": 1440, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:31.115884", "step": 1440, "epoch": 1 }, { "type": "loss", "content": 0.009430590085685253, "timestamp": "2025-09-10 02:27:31.117590", "step": 1441, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:31.146719", "step": 1441, "epoch": 1 }, { "type": "loss", "content": 0.05910153314471245, "timestamp": "2025-09-10 02:27:31.148300", "step": 1442, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:31.176814", "step": 1442, "epoch": 1 }, { "type": "loss", "content": 0.05851801484823227, "timestamp": "2025-09-10 02:27:31.178479", "step": 1443, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:31.207119", "step": 1443, "epoch": 1 }, { "type": "loss", "content": 0.0213641170412302, "timestamp": "2025-09-10 02:27:31.230390", "step": 1444, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:31.259367", "step": 1444, "epoch": 1 }, { "type": "loss", "content": 0.09502411633729935, "timestamp": "2025-09-10 02:27:31.261116", "step": 1445, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:31.289959", "step": 1445, "epoch": 1 }, { "type": "loss", "content": 0.015186154283583164, "timestamp": "2025-09-10 02:27:31.291995", "step": 1446, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:31.320928", "step": 1446, "epoch": 1 }, { "type": "loss", "content": 0.028979049995541573, "timestamp": "2025-09-10 02:27:31.322661", "step": 1447, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:31.351078", "step": 1447, "epoch": 1 }, { "type": "loss", "content": 0.04702762886881828, "timestamp": "2025-09-10 02:27:31.374147", "step": 1448, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:31.403523", "step": 1448, "epoch": 1 }, { "type": "loss", "content": 0.05766434594988823, "timestamp": "2025-09-10 02:27:31.405068", "step": 1449, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:31.433210", "step": 1449, "epoch": 1 }, { "type": "loss", "content": 0.019389281049370766, "timestamp": "2025-09-10 02:27:31.434897", "step": 1450, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:31.463616", "step": 1450, "epoch": 1 }, { "type": "loss", "content": 0.02509896643459797, "timestamp": "2025-09-10 02:27:31.465021", "step": 1451, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:31.493114", "step": 1451, "epoch": 1 }, { "type": "loss", "content": 0.003370345802977681, "timestamp": "2025-09-10 02:27:31.516332", "step": 1452, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:31.545122", "step": 1452, "epoch": 1 }, { "type": "loss", "content": 0.007237529847770929, "timestamp": "2025-09-10 02:27:31.546968", "step": 1453, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:31.576560", "step": 1453, "epoch": 1 }, { "type": "loss", "content": 0.02067815326154232, "timestamp": "2025-09-10 02:27:31.578129", "step": 1454, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:31.606490", "step": 1454, "epoch": 1 }, { "type": "loss", "content": 0.025009362027049065, "timestamp": "2025-09-10 02:27:31.608123", "step": 1455, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:31.637240", "step": 1455, "epoch": 1 }, { "type": "loss", "content": 0.030619319528341293, "timestamp": "2025-09-10 02:27:31.660033", "step": 1456, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:31.688593", "step": 1456, "epoch": 1 }, { "type": "loss", "content": 0.043088074773550034, "timestamp": "2025-09-10 02:27:31.689980", "step": 1457, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:31.718964", "step": 1457, "epoch": 1 }, { "type": "loss", "content": 0.040807925164699554, "timestamp": "2025-09-10 02:27:31.720392", "step": 1458, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:31.749288", "step": 1458, "epoch": 1 }, { "type": "loss", "content": 0.03335711732506752, "timestamp": "2025-09-10 02:27:31.750862", "step": 1459, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:31.779152", "step": 1459, "epoch": 1 }, { "type": "loss", "content": 0.026910170912742615, "timestamp": "2025-09-10 02:27:31.802243", "step": 1460, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:31.830794", "step": 1460, "epoch": 1 }, { "type": "loss", "content": 0.016020607203245163, "timestamp": "2025-09-10 02:27:31.832565", "step": 1461, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:31.860702", "step": 1461, "epoch": 1 }, { "type": "loss", "content": 0.05180508643388748, "timestamp": "2025-09-10 02:27:31.862204", "step": 1462, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:31.890408", "step": 1462, "epoch": 1 }, { "type": "loss", "content": 0.02024121955037117, "timestamp": "2025-09-10 02:27:31.892053", "step": 1463, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:31.920488", "step": 1463, "epoch": 1 }, { "type": "loss", "content": 0.011840170249342918, "timestamp": "2025-09-10 02:27:31.943615", "step": 1464, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:31.971985", "step": 1464, "epoch": 1 }, { "type": "loss", "content": 0.012870186008512974, "timestamp": "2025-09-10 02:27:31.973445", "step": 1465, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:32.002753", "step": 1465, "epoch": 1 }, { "type": "loss", "content": 0.07110538333654404, "timestamp": "2025-09-10 02:27:32.004078", "step": 1466, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:32.032171", "step": 1466, "epoch": 1 }, { "type": "loss", "content": 0.032635726034641266, "timestamp": "2025-09-10 02:27:32.034102", "step": 1467, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:32.062702", "step": 1467, "epoch": 1 }, { "type": "loss", "content": 0.014046980999410152, "timestamp": "2025-09-10 02:27:32.085805", "step": 1468, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:32.114594", "step": 1468, "epoch": 1 }, { "type": "loss", "content": 0.013907646760344505, "timestamp": "2025-09-10 02:27:32.116359", "step": 1469, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:32.145027", "step": 1469, "epoch": 1 }, { "type": "loss", "content": 0.021792763844132423, "timestamp": "2025-09-10 02:27:32.146898", "step": 1470, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:32.176262", "step": 1470, "epoch": 1 }, { "type": "loss", "content": 0.0276406891644001, "timestamp": "2025-09-10 02:27:32.177950", "step": 1471, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:32.206718", "step": 1471, "epoch": 1 }, { "type": "loss", "content": 0.0620223693549633, "timestamp": "2025-09-10 02:27:32.229693", "step": 1472, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:32.258221", "step": 1472, "epoch": 1 }, { "type": "loss", "content": 0.030954156070947647, "timestamp": "2025-09-10 02:27:32.259806", "step": 1473, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:32.288366", "step": 1473, "epoch": 1 }, { "type": "loss", "content": 0.029085157439112663, "timestamp": "2025-09-10 02:27:32.291308", "step": 1474, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:32.322048", "step": 1474, "epoch": 1 }, { "type": "loss", "content": 0.04033757373690605, "timestamp": "2025-09-10 02:27:32.323892", "step": 1475, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:32.352760", "step": 1475, "epoch": 1 }, { "type": "loss", "content": 0.010873624123632908, "timestamp": "2025-09-10 02:27:32.375762", "step": 1476, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:32.405040", "step": 1476, "epoch": 1 }, { "type": "loss", "content": 0.013691750355064869, "timestamp": "2025-09-10 02:27:32.406520", "step": 1477, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:32.435024", "step": 1477, "epoch": 1 }, { "type": "loss", "content": 0.03100155107676983, "timestamp": "2025-09-10 02:27:32.436679", "step": 1478, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:32.465034", "step": 1478, "epoch": 1 }, { "type": "loss", "content": 0.04212302714586258, "timestamp": "2025-09-10 02:27:32.466551", "step": 1479, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:32.495069", "step": 1479, "epoch": 1 }, { "type": "loss", "content": 0.030898677185177803, "timestamp": "2025-09-10 02:27:32.518256", "step": 1480, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:32.547040", "step": 1480, "epoch": 1 }, { "type": "loss", "content": 0.023815101012587547, "timestamp": "2025-09-10 02:27:32.548369", "step": 1481, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:32.576975", "step": 1481, "epoch": 1 }, { "type": "loss", "content": 0.014275996014475822, "timestamp": "2025-09-10 02:27:32.578699", "step": 1482, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:32.606997", "step": 1482, "epoch": 1 }, { "type": "loss", "content": 0.009228719398379326, "timestamp": "2025-09-10 02:27:32.608707", "step": 1483, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:32.636795", "step": 1483, "epoch": 1 }, { "type": "loss", "content": 0.03572138771414757, "timestamp": "2025-09-10 02:27:32.659766", "step": 1484, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:32.688203", "step": 1484, "epoch": 1 }, { "type": "loss", "content": 0.04589131101965904, "timestamp": "2025-09-10 02:27:32.689613", "step": 1485, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:32.717728", "step": 1485, "epoch": 1 }, { "type": "loss", "content": 0.02454020082950592, "timestamp": "2025-09-10 02:27:32.719125", "step": 1486, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:32.747188", "step": 1486, "epoch": 1 }, { "type": "loss", "content": 0.01638377271592617, "timestamp": "2025-09-10 02:27:32.748874", "step": 1487, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:32.777924", "step": 1487, "epoch": 1 }, { "type": "loss", "content": 0.019874457269906998, "timestamp": "2025-09-10 02:27:32.801074", "step": 1488, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:32.832662", "step": 1488, "epoch": 1 }, { "type": "loss", "content": 0.037941742688417435, "timestamp": "2025-09-10 02:27:32.834348", "step": 1489, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:32.865054", "step": 1489, "epoch": 1 }, { "type": "loss", "content": 0.04492131620645523, "timestamp": "2025-09-10 02:27:32.866852", "step": 1490, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:32.895175", "step": 1490, "epoch": 1 }, { "type": "loss", "content": 0.07406710088253021, "timestamp": "2025-09-10 02:27:32.896565", "step": 1491, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:32.925001", "step": 1491, "epoch": 1 }, { "type": "loss", "content": 0.038812749087810516, "timestamp": "2025-09-10 02:27:32.948254", "step": 1492, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:32.976640", "step": 1492, "epoch": 1 }, { "type": "loss", "content": 0.023289749398827553, "timestamp": "2025-09-10 02:27:32.978144", "step": 1493, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:33.006669", "step": 1493, "epoch": 1 }, { "type": "loss", "content": 0.022751254960894585, "timestamp": "2025-09-10 02:27:33.008143", "step": 1494, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:33.036653", "step": 1494, "epoch": 1 }, { "type": "loss", "content": 0.02474578656256199, "timestamp": "2025-09-10 02:27:33.038144", "step": 1495, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:33.067191", "step": 1495, "epoch": 1 }, { "type": "loss", "content": 0.027360495179891586, "timestamp": "2025-09-10 02:27:33.090160", "step": 1496, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:33.119219", "step": 1496, "epoch": 1 }, { "type": "loss", "content": 0.028771663084626198, "timestamp": "2025-09-10 02:27:33.120486", "step": 1497, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:33.149008", "step": 1497, "epoch": 1 }, { "type": "loss", "content": 0.03129454329609871, "timestamp": "2025-09-10 02:27:33.150332", "step": 1498, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:33.178918", "step": 1498, "epoch": 1 }, { "type": "loss", "content": 0.032071299850940704, "timestamp": "2025-09-10 02:27:33.180420", "step": 1499, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:33.208968", "step": 1499, "epoch": 1 }, { "type": "loss", "content": 0.01595943421125412, "timestamp": "2025-09-10 02:27:33.231876", "step": 1500, "epoch": 1 }, { "type": "info", "content": "Checkpoint saved at step 1500", "timestamp": "2025-09-10 02:27:37.678201", "step": 1500, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:37.714896", "step": 1500, "epoch": 1 }, { "type": "loss", "content": 0.031216096132993698, "timestamp": "2025-09-10 02:27:37.716786", "step": 1501, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:37.746667", "step": 1501, "epoch": 1 }, { "type": "loss", "content": 0.03223239257931709, "timestamp": "2025-09-10 02:27:37.748366", "step": 1502, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:37.776981", "step": 1502, "epoch": 1 }, { "type": "loss", "content": 0.016054486855864525, "timestamp": "2025-09-10 02:27:37.778737", "step": 1503, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:37.807761", "step": 1503, "epoch": 1 }, { "type": "loss", "content": 0.04805648326873779, "timestamp": "2025-09-10 02:27:37.831105", "step": 1504, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:37.860972", "step": 1504, "epoch": 1 }, { "type": "loss", "content": 0.033813804388046265, "timestamp": "2025-09-10 02:27:37.862379", "step": 1505, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:37.891485", "step": 1505, "epoch": 1 }, { "type": "loss", "content": 0.02551659755408764, "timestamp": "2025-09-10 02:27:37.893157", "step": 1506, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:37.923206", "step": 1506, "epoch": 1 }, { "type": "loss", "content": 0.07560396194458008, "timestamp": "2025-09-10 02:27:37.924923", "step": 1507, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:37.953608", "step": 1507, "epoch": 1 }, { "type": "loss", "content": 0.016034072265028954, "timestamp": "2025-09-10 02:27:37.977075", "step": 1508, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:38.005684", "step": 1508, "epoch": 1 }, { "type": "loss", "content": 0.054234106093645096, "timestamp": "2025-09-10 02:27:38.007523", "step": 1509, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:38.037083", "step": 1509, "epoch": 1 }, { "type": "loss", "content": 0.055994562804698944, "timestamp": "2025-09-10 02:27:38.038696", "step": 1510, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:38.067343", "step": 1510, "epoch": 1 }, { "type": "loss", "content": 0.05683322623372078, "timestamp": "2025-09-10 02:27:38.069835", "step": 1511, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:38.098901", "step": 1511, "epoch": 1 }, { "type": "loss", "content": 0.044902119785547256, "timestamp": "2025-09-10 02:27:38.122281", "step": 1512, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:38.151339", "step": 1512, "epoch": 1 }, { "type": "loss", "content": 0.049421072006225586, "timestamp": "2025-09-10 02:27:38.152944", "step": 1513, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:38.181620", "step": 1513, "epoch": 1 }, { "type": "loss", "content": 0.02994488552212715, "timestamp": "2025-09-10 02:27:38.183424", "step": 1514, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:38.212678", "step": 1514, "epoch": 1 }, { "type": "loss", "content": 0.024709923192858696, "timestamp": "2025-09-10 02:27:38.214372", "step": 1515, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:38.243432", "step": 1515, "epoch": 1 }, { "type": "loss", "content": 0.006796296685934067, "timestamp": "2025-09-10 02:27:38.267015", "step": 1516, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:38.295646", "step": 1516, "epoch": 1 }, { "type": "loss", "content": 0.01112403254956007, "timestamp": "2025-09-10 02:27:38.297418", "step": 1517, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:38.326801", "step": 1517, "epoch": 1 }, { "type": "loss", "content": 0.018252085894346237, "timestamp": "2025-09-10 02:27:38.328406", "step": 1518, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:38.356887", "step": 1518, "epoch": 1 }, { "type": "loss", "content": 0.01778973825275898, "timestamp": "2025-09-10 02:27:38.358688", "step": 1519, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:38.387741", "step": 1519, "epoch": 1 }, { "type": "loss", "content": 0.03736981377005577, "timestamp": "2025-09-10 02:27:38.410900", "step": 1520, "epoch": 1 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 96 ], "batch_size": 8, "flops": 1898404492032 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 5, 80 ], "batch_size": 8, "flops": 1582003754624 } ], "timestamp": "2025-09-10 02:27:40.281408", "step": 1520, "epoch": 1 }, { "type": "pplx", "content": 2612814.9375976576, "timestamp": "2025-09-10 02:27:40.282825", "step": 1520, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:40.311317", "step": 1520, "epoch": 1 }, { "type": "loss", "content": 0.028275569900870323, "timestamp": "2025-09-10 02:27:40.313021", "step": 1521, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:40.343548", "step": 1521, "epoch": 1 }, { "type": "loss", "content": 0.020396174862980843, "timestamp": "2025-09-10 02:27:40.345347", "step": 1522, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:40.376004", "step": 1522, "epoch": 1 }, { "type": "loss", "content": 0.027503030374646187, "timestamp": "2025-09-10 02:27:40.377518", "step": 1523, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:40.407281", "step": 1523, "epoch": 1 }, { "type": "loss", "content": 0.03672170639038086, "timestamp": "2025-09-10 02:27:40.430620", "step": 1524, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:40.460546", "step": 1524, "epoch": 1 }, { "type": "loss", "content": 0.02398330345749855, "timestamp": "2025-09-10 02:27:40.462249", "step": 1525, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:40.491664", "step": 1525, "epoch": 1 }, { "type": "loss", "content": 0.07760308682918549, "timestamp": "2025-09-10 02:27:40.493790", "step": 1526, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:40.523312", "step": 1526, "epoch": 1 }, { "type": "loss", "content": 0.026426970958709717, "timestamp": "2025-09-10 02:27:40.525266", "step": 1527, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:40.554605", "step": 1527, "epoch": 1 }, { "type": "loss", "content": 0.08535528182983398, "timestamp": "2025-09-10 02:27:40.578519", "step": 1528, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:40.607561", "step": 1528, "epoch": 1 }, { "type": "loss", "content": 0.03914172574877739, "timestamp": "2025-09-10 02:27:40.609195", "step": 1529, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:40.638632", "step": 1529, "epoch": 1 }, { "type": "loss", "content": 0.009633233770728111, "timestamp": "2025-09-10 02:27:40.640508", "step": 1530, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:40.669737", "step": 1530, "epoch": 1 }, { "type": "loss", "content": 0.0483718141913414, "timestamp": "2025-09-10 02:27:40.671540", "step": 1531, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:40.700091", "step": 1531, "epoch": 1 }, { "type": "loss", "content": 0.049617115408182144, "timestamp": "2025-09-10 02:27:40.723592", "step": 1532, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:40.753058", "step": 1532, "epoch": 1 }, { "type": "loss", "content": 0.018541039898991585, "timestamp": "2025-09-10 02:27:40.754888", "step": 1533, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:40.784533", "step": 1533, "epoch": 1 }, { "type": "loss", "content": 0.03365081921219826, "timestamp": "2025-09-10 02:27:40.786130", "step": 1534, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:40.814712", "step": 1534, "epoch": 1 }, { "type": "loss", "content": 0.042272280901670456, "timestamp": "2025-09-10 02:27:40.816551", "step": 1535, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:40.845211", "step": 1535, "epoch": 1 }, { "type": "loss", "content": 0.03286171704530716, "timestamp": "2025-09-10 02:27:40.868559", "step": 1536, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:40.897786", "step": 1536, "epoch": 1 }, { "type": "loss", "content": 0.027605948969721794, "timestamp": "2025-09-10 02:27:40.899115", "step": 1537, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:40.927427", "step": 1537, "epoch": 1 }, { "type": "loss", "content": 0.01908447965979576, "timestamp": "2025-09-10 02:27:40.929138", "step": 1538, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:40.957733", "step": 1538, "epoch": 1 }, { "type": "loss", "content": 0.053460389375686646, "timestamp": "2025-09-10 02:27:40.958928", "step": 1539, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:27:40.988080", "step": 1539, "epoch": 1 }, { "type": "loss", "content": 0.04275515303015709, "timestamp": "2025-09-10 02:27:41.011207", "step": 1540, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:41.040564", "step": 1540, "epoch": 1 }, { "type": "loss", "content": 0.046914078295230865, "timestamp": "2025-09-10 02:27:41.042382", "step": 1541, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:41.071814", "step": 1541, "epoch": 1 }, { "type": "loss", "content": 0.02406330220401287, "timestamp": "2025-09-10 02:27:41.073356", "step": 1542, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:41.101928", "step": 1542, "epoch": 1 }, { "type": "loss", "content": 0.004797068890184164, "timestamp": "2025-09-10 02:27:41.103390", "step": 1543, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:41.132295", "step": 1543, "epoch": 1 }, { "type": "loss", "content": 0.032057516276836395, "timestamp": "2025-09-10 02:27:41.155503", "step": 1544, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:41.184572", "step": 1544, "epoch": 1 }, { "type": "loss", "content": 0.03724870830774307, "timestamp": "2025-09-10 02:27:41.186365", "step": 1545, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:41.215090", "step": 1545, "epoch": 1 }, { "type": "loss", "content": 0.03298107162117958, "timestamp": "2025-09-10 02:27:41.217082", "step": 1546, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:41.246546", "step": 1546, "epoch": 1 }, { "type": "loss", "content": 0.05260986462235451, "timestamp": "2025-09-10 02:27:41.248449", "step": 1547, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:41.277553", "step": 1547, "epoch": 1 }, { "type": "loss", "content": 0.01699446514248848, "timestamp": "2025-09-10 02:27:41.301026", "step": 1548, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:41.330790", "step": 1548, "epoch": 1 }, { "type": "loss", "content": 0.031801871955394745, "timestamp": "2025-09-10 02:27:41.332663", "step": 1549, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:27:41.361835", "step": 1549, "epoch": 1 }, { "type": "loss", "content": 0.04717520251870155, "timestamp": "2025-09-10 02:27:41.363819", "step": 1550, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:41.392884", "step": 1550, "epoch": 1 }, { "type": "loss", "content": 0.018794167786836624, "timestamp": "2025-09-10 02:27:41.394972", "step": 1551, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:41.424031", "step": 1551, "epoch": 1 }, { "type": "loss", "content": 0.029533835127949715, "timestamp": "2025-09-10 02:27:41.447423", "step": 1552, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:41.476409", "step": 1552, "epoch": 1 }, { "type": "loss", "content": 0.03371378034353256, "timestamp": "2025-09-10 02:27:41.478115", "step": 1553, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:41.509111", "step": 1553, "epoch": 1 }, { "type": "loss", "content": 0.02296869456768036, "timestamp": "2025-09-10 02:27:41.510958", "step": 1554, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:41.540351", "step": 1554, "epoch": 1 }, { "type": "loss", "content": 0.05213111639022827, "timestamp": "2025-09-10 02:27:41.542372", "step": 1555, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:41.571477", "step": 1555, "epoch": 1 }, { "type": "loss", "content": 0.05112554877996445, "timestamp": "2025-09-10 02:27:41.595042", "step": 1556, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:41.623777", "step": 1556, "epoch": 1 }, { "type": "loss", "content": 0.03932832553982735, "timestamp": "2025-09-10 02:27:41.625655", "step": 1557, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:41.654816", "step": 1557, "epoch": 1 }, { "type": "loss", "content": 0.029580960050225258, "timestamp": "2025-09-10 02:27:41.656523", "step": 1558, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:41.687598", "step": 1558, "epoch": 1 }, { "type": "loss", "content": 0.03373652696609497, "timestamp": "2025-09-10 02:27:41.688977", "step": 1559, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:41.717909", "step": 1559, "epoch": 1 }, { "type": "loss", "content": 0.022883696481585503, "timestamp": "2025-09-10 02:27:41.741038", "step": 1560, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:41.776334", "step": 1560, "epoch": 1 }, { "type": "loss", "content": 0.015995008870959282, "timestamp": "2025-09-10 02:27:41.778088", "step": 1561, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:41.820795", "step": 1561, "epoch": 1 }, { "type": "loss", "content": 0.020030509680509567, "timestamp": "2025-09-10 02:27:41.823969", "step": 1562, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:41.865922", "step": 1562, "epoch": 1 }, { "type": "loss", "content": 0.02815486304461956, "timestamp": "2025-09-10 02:27:41.867372", "step": 1563, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:27:41.896316", "step": 1563, "epoch": 1 }, { "type": "loss", "content": 0.04295634850859642, "timestamp": "2025-09-10 02:27:41.919726", "step": 1564, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:41.949200", "step": 1564, "epoch": 1 }, { "type": "loss", "content": 0.04763511195778847, "timestamp": "2025-09-10 02:27:41.951126", "step": 1565, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:41.981311", "step": 1565, "epoch": 1 }, { "type": "loss", "content": 0.030427174642682076, "timestamp": "2025-09-10 02:27:41.982910", "step": 1566, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:42.011390", "step": 1566, "epoch": 1 }, { "type": "loss", "content": 0.016630670055747032, "timestamp": "2025-09-10 02:27:42.013229", "step": 1567, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:42.046782", "step": 1567, "epoch": 1 }, { "type": "loss", "content": 0.0399869866669178, "timestamp": "2025-09-10 02:27:42.072887", "step": 1568, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:42.102813", "step": 1568, "epoch": 1 }, { "type": "loss", "content": 0.04899362847208977, "timestamp": "2025-09-10 02:27:42.104331", "step": 1569, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:42.134548", "step": 1569, "epoch": 1 }, { "type": "loss", "content": 0.018443822860717773, "timestamp": "2025-09-10 02:27:42.136639", "step": 1570, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:42.165712", "step": 1570, "epoch": 1 }, { "type": "loss", "content": 0.038207512348890305, "timestamp": "2025-09-10 02:27:42.167702", "step": 1571, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:42.196489", "step": 1571, "epoch": 1 }, { "type": "loss", "content": 0.0143889794126153, "timestamp": "2025-09-10 02:27:42.219652", "step": 1572, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:42.248928", "step": 1572, "epoch": 1 }, { "type": "loss", "content": 0.0734647661447525, "timestamp": "2025-09-10 02:27:42.250935", "step": 1573, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:42.283104", "step": 1573, "epoch": 1 }, { "type": "loss", "content": 0.022592630237340927, "timestamp": "2025-09-10 02:27:42.285116", "step": 1574, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:42.323233", "step": 1574, "epoch": 1 }, { "type": "loss", "content": 0.0480295792222023, "timestamp": "2025-09-10 02:27:42.324792", "step": 1575, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:42.352974", "step": 1575, "epoch": 1 }, { "type": "loss", "content": 0.0173494815826416, "timestamp": "2025-09-10 02:27:42.376119", "step": 1576, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:42.404432", "step": 1576, "epoch": 1 }, { "type": "loss", "content": 0.04874527081847191, "timestamp": "2025-09-10 02:27:42.405747", "step": 1577, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:42.434276", "step": 1577, "epoch": 1 }, { "type": "loss", "content": 0.027809513732790947, "timestamp": "2025-09-10 02:27:42.435700", "step": 1578, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:42.463979", "step": 1578, "epoch": 1 }, { "type": "loss", "content": 0.033332522958517075, "timestamp": "2025-09-10 02:27:42.465523", "step": 1579, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:42.494495", "step": 1579, "epoch": 1 }, { "type": "loss", "content": 0.050775084644556046, "timestamp": "2025-09-10 02:27:42.517496", "step": 1580, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:42.545903", "step": 1580, "epoch": 1 }, { "type": "loss", "content": 0.03698405623435974, "timestamp": "2025-09-10 02:27:42.547403", "step": 1581, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:42.576528", "step": 1581, "epoch": 1 }, { "type": "loss", "content": 0.05788445845246315, "timestamp": "2025-09-10 02:27:42.578052", "step": 1582, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:42.606079", "step": 1582, "epoch": 1 }, { "type": "loss", "content": 0.009278425946831703, "timestamp": "2025-09-10 02:27:42.607818", "step": 1583, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:42.636501", "step": 1583, "epoch": 1 }, { "type": "loss", "content": 0.05599082633852959, "timestamp": "2025-09-10 02:27:42.659836", "step": 1584, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:42.689019", "step": 1584, "epoch": 1 }, { "type": "loss", "content": 0.03505483642220497, "timestamp": "2025-09-10 02:27:42.690790", "step": 1585, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:42.719347", "step": 1585, "epoch": 1 }, { "type": "loss", "content": 0.048709262162446976, "timestamp": "2025-09-10 02:27:42.720747", "step": 1586, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:42.749335", "step": 1586, "epoch": 1 }, { "type": "loss", "content": 0.06378418952226639, "timestamp": "2025-09-10 02:27:42.750876", "step": 1587, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:42.779259", "step": 1587, "epoch": 1 }, { "type": "loss", "content": 0.02308001182973385, "timestamp": "2025-09-10 02:27:42.802379", "step": 1588, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:42.830568", "step": 1588, "epoch": 1 }, { "type": "loss", "content": 0.036102551966905594, "timestamp": "2025-09-10 02:27:42.832346", "step": 1589, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:42.861945", "step": 1589, "epoch": 1 }, { "type": "loss", "content": 0.03362468630075455, "timestamp": "2025-09-10 02:27:42.863472", "step": 1590, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:42.891559", "step": 1590, "epoch": 1 }, { "type": "loss", "content": 0.04461940377950668, "timestamp": "2025-09-10 02:27:42.892977", "step": 1591, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:42.921518", "step": 1591, "epoch": 1 }, { "type": "loss", "content": 0.04401617869734764, "timestamp": "2025-09-10 02:27:42.944417", "step": 1592, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:42.974190", "step": 1592, "epoch": 1 }, { "type": "loss", "content": 0.03486516699194908, "timestamp": "2025-09-10 02:27:42.975532", "step": 1593, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:43.005314", "step": 1593, "epoch": 1 }, { "type": "loss", "content": 0.030225077643990517, "timestamp": "2025-09-10 02:27:43.007081", "step": 1594, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:43.036700", "step": 1594, "epoch": 1 }, { "type": "loss", "content": 0.04530999809503555, "timestamp": "2025-09-10 02:27:43.038232", "step": 1595, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:43.067360", "step": 1595, "epoch": 1 }, { "type": "loss", "content": 0.03315466269850731, "timestamp": "2025-09-10 02:27:43.090271", "step": 1596, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:43.118612", "step": 1596, "epoch": 1 }, { "type": "loss", "content": 0.03433266654610634, "timestamp": "2025-09-10 02:27:43.120222", "step": 1597, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:43.148786", "step": 1597, "epoch": 1 }, { "type": "loss", "content": 0.017902569845318794, "timestamp": "2025-09-10 02:27:43.150302", "step": 1598, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:43.178863", "step": 1598, "epoch": 1 }, { "type": "loss", "content": 0.032323576509952545, "timestamp": "2025-09-10 02:27:43.180600", "step": 1599, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:43.208792", "step": 1599, "epoch": 1 }, { "type": "loss", "content": 0.01567261666059494, "timestamp": "2025-09-10 02:27:43.231977", "step": 1600, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:43.261132", "step": 1600, "epoch": 1 }, { "type": "loss", "content": 0.03300542011857033, "timestamp": "2025-09-10 02:27:43.262600", "step": 1601, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:43.290807", "step": 1601, "epoch": 1 }, { "type": "loss", "content": 0.03128361329436302, "timestamp": "2025-09-10 02:27:43.292259", "step": 1602, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:43.320991", "step": 1602, "epoch": 1 }, { "type": "loss", "content": 0.021510543301701546, "timestamp": "2025-09-10 02:27:43.322612", "step": 1603, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:43.350895", "step": 1603, "epoch": 1 }, { "type": "loss", "content": 0.05365094542503357, "timestamp": "2025-09-10 02:27:43.374043", "step": 1604, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:43.403726", "step": 1604, "epoch": 1 }, { "type": "loss", "content": 0.03761393204331398, "timestamp": "2025-09-10 02:27:43.405308", "step": 1605, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:43.433961", "step": 1605, "epoch": 1 }, { "type": "loss", "content": 0.00948913861066103, "timestamp": "2025-09-10 02:27:43.435461", "step": 1606, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:43.463843", "step": 1606, "epoch": 1 }, { "type": "loss", "content": 0.047155749052762985, "timestamp": "2025-09-10 02:27:43.465336", "step": 1607, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:43.494372", "step": 1607, "epoch": 1 }, { "type": "loss", "content": 0.0055794003419578075, "timestamp": "2025-09-10 02:27:43.517506", "step": 1608, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:43.545822", "step": 1608, "epoch": 1 }, { "type": "loss", "content": 0.021954579278826714, "timestamp": "2025-09-10 02:27:43.547425", "step": 1609, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:43.576580", "step": 1609, "epoch": 1 }, { "type": "loss", "content": 0.02650618925690651, "timestamp": "2025-09-10 02:27:43.577987", "step": 1610, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:43.606257", "step": 1610, "epoch": 1 }, { "type": "loss", "content": 0.043255094438791275, "timestamp": "2025-09-10 02:27:43.608041", "step": 1611, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:43.636593", "step": 1611, "epoch": 1 }, { "type": "loss", "content": 0.03896671161055565, "timestamp": "2025-09-10 02:27:43.659738", "step": 1612, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:43.688092", "step": 1612, "epoch": 1 }, { "type": "loss", "content": 0.013624775223433971, "timestamp": "2025-09-10 02:27:43.689711", "step": 1613, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:43.718571", "step": 1613, "epoch": 1 }, { "type": "loss", "content": 0.039595868438482285, "timestamp": "2025-09-10 02:27:43.720447", "step": 1614, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:43.749713", "step": 1614, "epoch": 1 }, { "type": "loss", "content": 0.031632862985134125, "timestamp": "2025-09-10 02:27:43.751347", "step": 1615, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:43.779932", "step": 1615, "epoch": 1 }, { "type": "loss", "content": 0.03027370758354664, "timestamp": "2025-09-10 02:27:43.802963", "step": 1616, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:43.831392", "step": 1616, "epoch": 1 }, { "type": "loss", "content": 0.03935625031590462, "timestamp": "2025-09-10 02:27:43.833006", "step": 1617, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:43.861948", "step": 1617, "epoch": 1 }, { "type": "loss", "content": 0.0709146186709404, "timestamp": "2025-09-10 02:27:43.863473", "step": 1618, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:43.892693", "step": 1618, "epoch": 1 }, { "type": "loss", "content": 0.04960712417960167, "timestamp": "2025-09-10 02:27:43.894176", "step": 1619, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:43.923086", "step": 1619, "epoch": 1 }, { "type": "loss", "content": 0.022915851324796677, "timestamp": "2025-09-10 02:27:43.946113", "step": 1620, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:43.975367", "step": 1620, "epoch": 1 }, { "type": "loss", "content": 0.016043413430452347, "timestamp": "2025-09-10 02:27:43.978073", "step": 1621, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:44.010444", "step": 1621, "epoch": 1 }, { "type": "loss", "content": 0.017392989248037338, "timestamp": "2025-09-10 02:27:44.012073", "step": 1622, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:44.041207", "step": 1622, "epoch": 1 }, { "type": "loss", "content": 0.060260094702243805, "timestamp": "2025-09-10 02:27:44.042762", "step": 1623, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:44.071066", "step": 1623, "epoch": 1 }, { "type": "loss", "content": 0.058866389095783234, "timestamp": "2025-09-10 02:27:44.093964", "step": 1624, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:44.123024", "step": 1624, "epoch": 1 }, { "type": "loss", "content": 0.014653525315225124, "timestamp": "2025-09-10 02:27:44.124436", "step": 1625, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:44.152603", "step": 1625, "epoch": 1 }, { "type": "loss", "content": 0.03627878054976463, "timestamp": "2025-09-10 02:27:44.154244", "step": 1626, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:44.182972", "step": 1626, "epoch": 1 }, { "type": "loss", "content": 0.03967222198843956, "timestamp": "2025-09-10 02:27:44.185016", "step": 1627, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:44.213630", "step": 1627, "epoch": 1 }, { "type": "loss", "content": 0.048887427896261215, "timestamp": "2025-09-10 02:27:44.236990", "step": 1628, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:44.265542", "step": 1628, "epoch": 1 }, { "type": "loss", "content": 0.037526946514844894, "timestamp": "2025-09-10 02:27:44.267370", "step": 1629, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:44.296660", "step": 1629, "epoch": 1 }, { "type": "loss", "content": 0.00783440750092268, "timestamp": "2025-09-10 02:27:44.298230", "step": 1630, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:44.326724", "step": 1630, "epoch": 1 }, { "type": "loss", "content": 0.011957625858485699, "timestamp": "2025-09-10 02:27:44.328413", "step": 1631, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:44.357216", "step": 1631, "epoch": 1 }, { "type": "loss", "content": 0.03405969962477684, "timestamp": "2025-09-10 02:27:44.380626", "step": 1632, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:44.409404", "step": 1632, "epoch": 1 }, { "type": "loss", "content": 0.011136604472994804, "timestamp": "2025-09-10 02:27:44.411003", "step": 1633, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:44.439355", "step": 1633, "epoch": 1 }, { "type": "loss", "content": 0.036693062633275986, "timestamp": "2025-09-10 02:27:44.441105", "step": 1634, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:44.469826", "step": 1634, "epoch": 1 }, { "type": "loss", "content": 0.021266072988510132, "timestamp": "2025-09-10 02:27:44.471433", "step": 1635, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:44.500192", "step": 1635, "epoch": 1 }, { "type": "loss", "content": 0.01566631905734539, "timestamp": "2025-09-10 02:27:44.523117", "step": 1636, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:44.551965", "step": 1636, "epoch": 1 }, { "type": "loss", "content": 0.04040166735649109, "timestamp": "2025-09-10 02:27:44.553651", "step": 1637, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:44.582469", "step": 1637, "epoch": 1 }, { "type": "loss", "content": 0.0320776030421257, "timestamp": "2025-09-10 02:27:44.584161", "step": 1638, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:44.612507", "step": 1638, "epoch": 1 }, { "type": "loss", "content": 0.011080854572355747, "timestamp": "2025-09-10 02:27:44.614260", "step": 1639, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:44.642963", "step": 1639, "epoch": 1 }, { "type": "loss", "content": 0.019626542925834656, "timestamp": "2025-09-10 02:27:44.666041", "step": 1640, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:44.694693", "step": 1640, "epoch": 1 }, { "type": "loss", "content": 0.019304517656564713, "timestamp": "2025-09-10 02:27:44.696296", "step": 1641, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:44.725577", "step": 1641, "epoch": 1 }, { "type": "loss", "content": 0.024365395307540894, "timestamp": "2025-09-10 02:27:44.727086", "step": 1642, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:44.755270", "step": 1642, "epoch": 1 }, { "type": "loss", "content": 0.04958360642194748, "timestamp": "2025-09-10 02:27:44.757053", "step": 1643, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:44.785805", "step": 1643, "epoch": 1 }, { "type": "loss", "content": 0.037695057690143585, "timestamp": "2025-09-10 02:27:44.808840", "step": 1644, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:44.837497", "step": 1644, "epoch": 1 }, { "type": "loss", "content": 0.019487785175442696, "timestamp": "2025-09-10 02:27:44.838999", "step": 1645, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:44.866958", "step": 1645, "epoch": 1 }, { "type": "loss", "content": 0.017532160505652428, "timestamp": "2025-09-10 02:27:44.868474", "step": 1646, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:44.896893", "step": 1646, "epoch": 1 }, { "type": "loss", "content": 0.030309343710541725, "timestamp": "2025-09-10 02:27:44.898260", "step": 1647, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:44.926284", "step": 1647, "epoch": 1 }, { "type": "loss", "content": 0.05629602074623108, "timestamp": "2025-09-10 02:27:44.949480", "step": 1648, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:44.977975", "step": 1648, "epoch": 1 }, { "type": "loss", "content": 0.022461825981736183, "timestamp": "2025-09-10 02:27:44.979529", "step": 1649, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:45.008306", "step": 1649, "epoch": 1 }, { "type": "loss", "content": 0.02396000362932682, "timestamp": "2025-09-10 02:27:45.009678", "step": 1650, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:45.037720", "step": 1650, "epoch": 1 }, { "type": "loss", "content": 0.02895105816423893, "timestamp": "2025-09-10 02:27:45.039279", "step": 1651, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:45.067575", "step": 1651, "epoch": 1 }, { "type": "loss", "content": 0.01280116755515337, "timestamp": "2025-09-10 02:27:45.090333", "step": 1652, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:45.118525", "step": 1652, "epoch": 1 }, { "type": "loss", "content": 0.07098526507616043, "timestamp": "2025-09-10 02:27:45.119954", "step": 1653, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:45.148605", "step": 1653, "epoch": 1 }, { "type": "loss", "content": 0.016890021041035652, "timestamp": "2025-09-10 02:27:45.150250", "step": 1654, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:45.179562", "step": 1654, "epoch": 1 }, { "type": "loss", "content": 0.02893208898603916, "timestamp": "2025-09-10 02:27:45.181092", "step": 1655, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:45.209545", "step": 1655, "epoch": 1 }, { "type": "loss", "content": 0.04043635353446007, "timestamp": "2025-09-10 02:27:45.232799", "step": 1656, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:45.261819", "step": 1656, "epoch": 1 }, { "type": "loss", "content": 0.03076103888452053, "timestamp": "2025-09-10 02:27:45.263582", "step": 1657, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:45.292153", "step": 1657, "epoch": 1 }, { "type": "loss", "content": 0.01457404438406229, "timestamp": "2025-09-10 02:27:45.293799", "step": 1658, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:45.321999", "step": 1658, "epoch": 1 }, { "type": "loss", "content": 0.032336682081222534, "timestamp": "2025-09-10 02:27:45.323653", "step": 1659, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:45.351963", "step": 1659, "epoch": 1 }, { "type": "loss", "content": 0.04353252425789833, "timestamp": "2025-09-10 02:27:45.375309", "step": 1660, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:45.403839", "step": 1660, "epoch": 1 }, { "type": "loss", "content": 0.01743830367922783, "timestamp": "2025-09-10 02:27:45.405345", "step": 1661, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:45.434321", "step": 1661, "epoch": 1 }, { "type": "loss", "content": 0.06116855517029762, "timestamp": "2025-09-10 02:27:45.435856", "step": 1662, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:45.464112", "step": 1662, "epoch": 1 }, { "type": "loss", "content": 0.02149762213230133, "timestamp": "2025-09-10 02:27:45.465691", "step": 1663, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:45.493957", "step": 1663, "epoch": 1 }, { "type": "loss", "content": 0.023374240845441818, "timestamp": "2025-09-10 02:27:45.517054", "step": 1664, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:45.545705", "step": 1664, "epoch": 1 }, { "type": "loss", "content": 0.0031735931988805532, "timestamp": "2025-09-10 02:27:45.547084", "step": 1665, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:45.575619", "step": 1665, "epoch": 1 }, { "type": "loss", "content": 0.01768544688820839, "timestamp": "2025-09-10 02:27:45.577131", "step": 1666, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:45.605225", "step": 1666, "epoch": 1 }, { "type": "loss", "content": 0.02468477189540863, "timestamp": "2025-09-10 02:27:45.606744", "step": 1667, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:45.634987", "step": 1667, "epoch": 1 }, { "type": "loss", "content": 0.02794378623366356, "timestamp": "2025-09-10 02:27:45.659175", "step": 1668, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:45.688159", "step": 1668, "epoch": 1 }, { "type": "loss", "content": 0.004905772395431995, "timestamp": "2025-09-10 02:27:45.689583", "step": 1669, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:45.718566", "step": 1669, "epoch": 1 }, { "type": "loss", "content": 0.04774912819266319, "timestamp": "2025-09-10 02:27:45.720076", "step": 1670, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:45.749082", "step": 1670, "epoch": 1 }, { "type": "loss", "content": 0.005234159994870424, "timestamp": "2025-09-10 02:27:45.750935", "step": 1671, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:45.779850", "step": 1671, "epoch": 1 }, { "type": "loss", "content": 0.043323665857315063, "timestamp": "2025-09-10 02:27:45.803094", "step": 1672, "epoch": 1 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 96 ], "batch_size": 8, "flops": 1898404492032 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 5, 80 ], "batch_size": 8, "flops": 1582003754624 } ], "timestamp": "2025-09-10 02:27:47.722743", "step": 1672, "epoch": 1 }, { "type": "pplx", "content": 2764803.1894251006, "timestamp": "2025-09-10 02:27:47.724332", "step": 1672, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:47.752431", "step": 1672, "epoch": 1 }, { "type": "loss", "content": 0.04397788271307945, "timestamp": "2025-09-10 02:27:47.754068", "step": 1673, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:47.782317", "step": 1673, "epoch": 1 }, { "type": "loss", "content": 0.03467138856649399, "timestamp": "2025-09-10 02:27:47.783891", "step": 1674, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:47.814497", "step": 1674, "epoch": 1 }, { "type": "loss", "content": 0.009997214190661907, "timestamp": "2025-09-10 02:27:47.817057", "step": 1675, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:47.845456", "step": 1675, "epoch": 1 }, { "type": "loss", "content": 0.027228351682424545, "timestamp": "2025-09-10 02:27:47.868487", "step": 1676, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:47.897179", "step": 1676, "epoch": 1 }, { "type": "loss", "content": 0.020021233707666397, "timestamp": "2025-09-10 02:27:47.898665", "step": 1677, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:47.926601", "step": 1677, "epoch": 1 }, { "type": "loss", "content": 0.02924380823969841, "timestamp": "2025-09-10 02:27:47.928228", "step": 1678, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:47.956528", "step": 1678, "epoch": 1 }, { "type": "loss", "content": 0.030104586854577065, "timestamp": "2025-09-10 02:27:47.958021", "step": 1679, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:47.986109", "step": 1679, "epoch": 1 }, { "type": "loss", "content": 0.031224578619003296, "timestamp": "2025-09-10 02:27:48.009429", "step": 1680, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:48.038089", "step": 1680, "epoch": 1 }, { "type": "loss", "content": 0.022594813257455826, "timestamp": "2025-09-10 02:27:48.039667", "step": 1681, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:48.068067", "step": 1681, "epoch": 1 }, { "type": "loss", "content": 0.028602972626686096, "timestamp": "2025-09-10 02:27:48.069465", "step": 1682, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:48.098138", "step": 1682, "epoch": 1 }, { "type": "loss", "content": 0.01706654205918312, "timestamp": "2025-09-10 02:27:48.099601", "step": 1683, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:48.127855", "step": 1683, "epoch": 1 }, { "type": "loss", "content": 0.00787374284118414, "timestamp": "2025-09-10 02:27:48.150975", "step": 1684, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:48.179784", "step": 1684, "epoch": 1 }, { "type": "loss", "content": 0.029157137498259544, "timestamp": "2025-09-10 02:27:48.181217", "step": 1685, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:48.209312", "step": 1685, "epoch": 1 }, { "type": "loss", "content": 0.0034372094087302685, "timestamp": "2025-09-10 02:27:48.211067", "step": 1686, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:48.239376", "step": 1686, "epoch": 1 }, { "type": "loss", "content": 0.01937475986778736, "timestamp": "2025-09-10 02:27:48.240796", "step": 1687, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:48.268797", "step": 1687, "epoch": 1 }, { "type": "loss", "content": 0.041939932852983475, "timestamp": "2025-09-10 02:27:48.292002", "step": 1688, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:48.320677", "step": 1688, "epoch": 1 }, { "type": "loss", "content": 0.02188614383339882, "timestamp": "2025-09-10 02:27:48.323506", "step": 1689, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:48.355816", "step": 1689, "epoch": 1 }, { "type": "loss", "content": 0.027482228353619576, "timestamp": "2025-09-10 02:27:48.357157", "step": 1690, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:48.385605", "step": 1690, "epoch": 1 }, { "type": "loss", "content": 0.03252260759472847, "timestamp": "2025-09-10 02:27:48.387315", "step": 1691, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:48.415723", "step": 1691, "epoch": 1 }, { "type": "loss", "content": 0.06514390558004379, "timestamp": "2025-09-10 02:27:48.440111", "step": 1692, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:48.469992", "step": 1692, "epoch": 1 }, { "type": "loss", "content": 0.02514522336423397, "timestamp": "2025-09-10 02:27:48.471495", "step": 1693, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:48.500726", "step": 1693, "epoch": 1 }, { "type": "loss", "content": 0.005847656633704901, "timestamp": "2025-09-10 02:27:48.503319", "step": 1694, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:48.532798", "step": 1694, "epoch": 1 }, { "type": "loss", "content": 0.03883161023259163, "timestamp": "2025-09-10 02:27:48.534584", "step": 1695, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:48.563253", "step": 1695, "epoch": 1 }, { "type": "loss", "content": 0.007636170368641615, "timestamp": "2025-09-10 02:27:48.586464", "step": 1696, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:48.615800", "step": 1696, "epoch": 1 }, { "type": "loss", "content": 0.022910278290510178, "timestamp": "2025-09-10 02:27:48.617437", "step": 1697, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:48.646120", "step": 1697, "epoch": 1 }, { "type": "loss", "content": 0.030868876725435257, "timestamp": "2025-09-10 02:27:48.647825", "step": 1698, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:48.676971", "step": 1698, "epoch": 1 }, { "type": "loss", "content": 0.031839769333601, "timestamp": "2025-09-10 02:27:48.678378", "step": 1699, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:48.707246", "step": 1699, "epoch": 1 }, { "type": "loss", "content": 0.050692055374383926, "timestamp": "2025-09-10 02:27:48.730366", "step": 1700, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:48.759234", "step": 1700, "epoch": 1 }, { "type": "loss", "content": 0.04352886602282524, "timestamp": "2025-09-10 02:27:48.760993", "step": 1701, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:48.790031", "step": 1701, "epoch": 1 }, { "type": "loss", "content": 0.036653898656368256, "timestamp": "2025-09-10 02:27:48.791430", "step": 1702, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:48.819724", "step": 1702, "epoch": 1 }, { "type": "loss", "content": 0.06488614529371262, "timestamp": "2025-09-10 02:27:48.821457", "step": 1703, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:48.850112", "step": 1703, "epoch": 1 }, { "type": "loss", "content": 0.012786167673766613, "timestamp": "2025-09-10 02:27:48.873196", "step": 1704, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:48.901970", "step": 1704, "epoch": 1 }, { "type": "loss", "content": 0.043236348778009415, "timestamp": "2025-09-10 02:27:48.903382", "step": 1705, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:48.931792", "step": 1705, "epoch": 1 }, { "type": "loss", "content": 0.02485862746834755, "timestamp": "2025-09-10 02:27:48.933431", "step": 1706, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:48.962474", "step": 1706, "epoch": 1 }, { "type": "loss", "content": 0.05799518898129463, "timestamp": "2025-09-10 02:27:48.963975", "step": 1707, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:48.992520", "step": 1707, "epoch": 1 }, { "type": "loss", "content": 0.01807405613362789, "timestamp": "2025-09-10 02:27:49.015700", "step": 1708, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:49.044885", "step": 1708, "epoch": 1 }, { "type": "loss", "content": 0.008302552625536919, "timestamp": "2025-09-10 02:27:49.046323", "step": 1709, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:27:49.075157", "step": 1709, "epoch": 1 }, { "type": "loss", "content": 0.014733013696968555, "timestamp": "2025-09-10 02:27:49.076992", "step": 1710, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:49.105626", "step": 1710, "epoch": 1 }, { "type": "loss", "content": 0.06290986388921738, "timestamp": "2025-09-10 02:27:49.107098", "step": 1711, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:49.135687", "step": 1711, "epoch": 1 }, { "type": "loss", "content": 0.0030225724913179874, "timestamp": "2025-09-10 02:27:49.158848", "step": 1712, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:49.187977", "step": 1712, "epoch": 1 }, { "type": "loss", "content": 0.009493774734437466, "timestamp": "2025-09-10 02:27:49.189443", "step": 1713, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:49.218142", "step": 1713, "epoch": 1 }, { "type": "loss", "content": 0.0634767934679985, "timestamp": "2025-09-10 02:27:49.219855", "step": 1714, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:49.248400", "step": 1714, "epoch": 1 }, { "type": "loss", "content": 0.045059945434331894, "timestamp": "2025-09-10 02:27:49.250064", "step": 1715, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:49.278627", "step": 1715, "epoch": 1 }, { "type": "loss", "content": 0.009607000276446342, "timestamp": "2025-09-10 02:27:49.301556", "step": 1716, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:49.330777", "step": 1716, "epoch": 1 }, { "type": "loss", "content": 0.02127017080783844, "timestamp": "2025-09-10 02:27:49.332219", "step": 1717, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:49.360819", "step": 1717, "epoch": 1 }, { "type": "loss", "content": 0.05764401704072952, "timestamp": "2025-09-10 02:27:49.362490", "step": 1718, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:49.390787", "step": 1718, "epoch": 1 }, { "type": "loss", "content": 0.06865488737821579, "timestamp": "2025-09-10 02:27:49.392270", "step": 1719, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:49.421124", "step": 1719, "epoch": 1 }, { "type": "loss", "content": 0.05024939775466919, "timestamp": "2025-09-10 02:27:49.444316", "step": 1720, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:49.473616", "step": 1720, "epoch": 1 }, { "type": "loss", "content": 0.022084476426243782, "timestamp": "2025-09-10 02:27:49.475276", "step": 1721, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:49.504609", "step": 1721, "epoch": 1 }, { "type": "loss", "content": 0.038758330047130585, "timestamp": "2025-09-10 02:27:49.507113", "step": 1722, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:49.537841", "step": 1722, "epoch": 1 }, { "type": "loss", "content": 0.04677200689911842, "timestamp": "2025-09-10 02:27:49.539490", "step": 1723, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:49.568801", "step": 1723, "epoch": 1 }, { "type": "loss", "content": 0.01305408775806427, "timestamp": "2025-09-10 02:27:49.591856", "step": 1724, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:49.620897", "step": 1724, "epoch": 1 }, { "type": "loss", "content": 0.012123716995120049, "timestamp": "2025-09-10 02:27:49.623010", "step": 1725, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:49.652003", "step": 1725, "epoch": 1 }, { "type": "loss", "content": 0.04148506000638008, "timestamp": "2025-09-10 02:27:49.653670", "step": 1726, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:49.682319", "step": 1726, "epoch": 1 }, { "type": "loss", "content": 0.010827691294252872, "timestamp": "2025-09-10 02:27:49.683948", "step": 1727, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:49.712397", "step": 1727, "epoch": 1 }, { "type": "loss", "content": 0.04800831526517868, "timestamp": "2025-09-10 02:27:49.735690", "step": 1728, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:49.764917", "step": 1728, "epoch": 1 }, { "type": "loss", "content": 0.01778292842209339, "timestamp": "2025-09-10 02:27:49.766504", "step": 1729, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:49.795146", "step": 1729, "epoch": 1 }, { "type": "loss", "content": 0.03920115903019905, "timestamp": "2025-09-10 02:27:49.796779", "step": 1730, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:27:49.825249", "step": 1730, "epoch": 1 }, { "type": "loss", "content": 0.018084228038787842, "timestamp": "2025-09-10 02:27:49.826880", "step": 1731, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:49.856238", "step": 1731, "epoch": 1 }, { "type": "loss", "content": 0.04701170325279236, "timestamp": "2025-09-10 02:27:49.879640", "step": 1732, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:27:49.908641", "step": 1732, "epoch": 1 }, { "type": "loss", "content": 0.013462777249515057, "timestamp": "2025-09-10 02:27:49.910241", "step": 1733, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:49.938941", "step": 1733, "epoch": 1 }, { "type": "loss", "content": 0.0071736471727490425, "timestamp": "2025-09-10 02:27:49.940544", "step": 1734, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:49.968959", "step": 1734, "epoch": 1 }, { "type": "loss", "content": 0.030766667798161507, "timestamp": "2025-09-10 02:27:49.971789", "step": 1735, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:50.001703", "step": 1735, "epoch": 1 }, { "type": "loss", "content": 0.054909367114305496, "timestamp": "2025-09-10 02:27:50.025043", "step": 1736, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:50.054108", "step": 1736, "epoch": 1 }, { "type": "loss", "content": 0.03182930126786232, "timestamp": "2025-09-10 02:27:50.055971", "step": 1737, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:50.085056", "step": 1737, "epoch": 1 }, { "type": "loss", "content": 0.05137627571821213, "timestamp": "2025-09-10 02:27:50.086958", "step": 1738, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:50.115597", "step": 1738, "epoch": 1 }, { "type": "loss", "content": 0.056904930621385574, "timestamp": "2025-09-10 02:27:50.117207", "step": 1739, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:50.145728", "step": 1739, "epoch": 1 }, { "type": "loss", "content": 0.006250171922147274, "timestamp": "2025-09-10 02:27:50.168861", "step": 1740, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:50.197439", "step": 1740, "epoch": 1 }, { "type": "loss", "content": 0.03876912593841553, "timestamp": "2025-09-10 02:27:50.199040", "step": 1741, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:50.227679", "step": 1741, "epoch": 1 }, { "type": "loss", "content": 0.014502237550914288, "timestamp": "2025-09-10 02:27:50.229234", "step": 1742, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:50.257954", "step": 1742, "epoch": 1 }, { "type": "loss", "content": 0.01644100435078144, "timestamp": "2025-09-10 02:27:50.259506", "step": 1743, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:50.288121", "step": 1743, "epoch": 1 }, { "type": "loss", "content": 0.0441061295568943, "timestamp": "2025-09-10 02:27:50.311067", "step": 1744, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:50.340038", "step": 1744, "epoch": 1 }, { "type": "loss", "content": 0.005406923592090607, "timestamp": "2025-09-10 02:27:50.341728", "step": 1745, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:50.370447", "step": 1745, "epoch": 1 }, { "type": "loss", "content": 0.01910267397761345, "timestamp": "2025-09-10 02:27:50.371945", "step": 1746, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:50.400075", "step": 1746, "epoch": 1 }, { "type": "loss", "content": 0.04482027888298035, "timestamp": "2025-09-10 02:27:50.401461", "step": 1747, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:50.429669", "step": 1747, "epoch": 1 }, { "type": "loss", "content": 0.05291486158967018, "timestamp": "2025-09-10 02:27:50.452750", "step": 1748, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:50.481527", "step": 1748, "epoch": 1 }, { "type": "loss", "content": 0.008437646552920341, "timestamp": "2025-09-10 02:27:50.483234", "step": 1749, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:50.511909", "step": 1749, "epoch": 1 }, { "type": "loss", "content": 0.04366893693804741, "timestamp": "2025-09-10 02:27:50.513503", "step": 1750, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:50.542158", "step": 1750, "epoch": 1 }, { "type": "loss", "content": 0.02228725515305996, "timestamp": "2025-09-10 02:27:50.543781", "step": 1751, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:50.572903", "step": 1751, "epoch": 1 }, { "type": "loss", "content": 0.02205096371471882, "timestamp": "2025-09-10 02:27:50.595986", "step": 1752, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:50.625500", "step": 1752, "epoch": 1 }, { "type": "loss", "content": 0.02804812788963318, "timestamp": "2025-09-10 02:27:50.627233", "step": 1753, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:50.655999", "step": 1753, "epoch": 1 }, { "type": "loss", "content": 0.043668944388628006, "timestamp": "2025-09-10 02:27:50.657822", "step": 1754, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:50.686459", "step": 1754, "epoch": 1 }, { "type": "loss", "content": 0.03522314503788948, "timestamp": "2025-09-10 02:27:50.688046", "step": 1755, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:50.716668", "step": 1755, "epoch": 1 }, { "type": "loss", "content": 0.007187033537775278, "timestamp": "2025-09-10 02:27:50.739859", "step": 1756, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:50.768181", "step": 1756, "epoch": 1 }, { "type": "loss", "content": 0.048044055700302124, "timestamp": "2025-09-10 02:27:50.769532", "step": 1757, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:50.798599", "step": 1757, "epoch": 1 }, { "type": "loss", "content": 0.023681072518229485, "timestamp": "2025-09-10 02:27:50.799979", "step": 1758, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:50.828197", "step": 1758, "epoch": 1 }, { "type": "loss", "content": 0.043920863419771194, "timestamp": "2025-09-10 02:27:50.829783", "step": 1759, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:50.859057", "step": 1759, "epoch": 1 }, { "type": "loss", "content": 0.015966571867465973, "timestamp": "2025-09-10 02:27:50.882053", "step": 1760, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:50.911058", "step": 1760, "epoch": 1 }, { "type": "loss", "content": 0.024546165019273758, "timestamp": "2025-09-10 02:27:50.912429", "step": 1761, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:50.941299", "step": 1761, "epoch": 1 }, { "type": "loss", "content": 0.03937734290957451, "timestamp": "2025-09-10 02:27:50.943017", "step": 1762, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:50.972033", "step": 1762, "epoch": 1 }, { "type": "loss", "content": 0.015845147892832756, "timestamp": "2025-09-10 02:27:50.974035", "step": 1763, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:51.002798", "step": 1763, "epoch": 1 }, { "type": "loss", "content": 0.02144877426326275, "timestamp": "2025-09-10 02:27:51.026094", "step": 1764, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:51.054897", "step": 1764, "epoch": 1 }, { "type": "loss", "content": 0.019191885367035866, "timestamp": "2025-09-10 02:27:51.056748", "step": 1765, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:51.084970", "step": 1765, "epoch": 1 }, { "type": "loss", "content": 0.023099040612578392, "timestamp": "2025-09-10 02:27:51.086658", "step": 1766, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:51.115093", "step": 1766, "epoch": 1 }, { "type": "loss", "content": 0.022091463208198547, "timestamp": "2025-09-10 02:27:51.116947", "step": 1767, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:51.145495", "step": 1767, "epoch": 1 }, { "type": "loss", "content": 0.018582088872790337, "timestamp": "2025-09-10 02:27:51.168999", "step": 1768, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:51.197556", "step": 1768, "epoch": 1 }, { "type": "loss", "content": 0.02827291376888752, "timestamp": "2025-09-10 02:27:51.199126", "step": 1769, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:51.227635", "step": 1769, "epoch": 1 }, { "type": "loss", "content": 0.03188592568039894, "timestamp": "2025-09-10 02:27:51.229246", "step": 1770, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:51.258014", "step": 1770, "epoch": 1 }, { "type": "loss", "content": 0.03314082697033882, "timestamp": "2025-09-10 02:27:51.259780", "step": 1771, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:51.288468", "step": 1771, "epoch": 1 }, { "type": "loss", "content": 0.022767921909689903, "timestamp": "2025-09-10 02:27:51.311705", "step": 1772, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:51.340467", "step": 1772, "epoch": 1 }, { "type": "loss", "content": 0.03807010129094124, "timestamp": "2025-09-10 02:27:51.342367", "step": 1773, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:51.370850", "step": 1773, "epoch": 1 }, { "type": "loss", "content": 0.04123767092823982, "timestamp": "2025-09-10 02:27:51.372394", "step": 1774, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:51.401036", "step": 1774, "epoch": 1 }, { "type": "loss", "content": 0.024406859651207924, "timestamp": "2025-09-10 02:27:51.402413", "step": 1775, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:27:51.430962", "step": 1775, "epoch": 1 }, { "type": "loss", "content": 0.019260989502072334, "timestamp": "2025-09-10 02:27:51.455116", "step": 1776, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:51.484089", "step": 1776, "epoch": 1 }, { "type": "loss", "content": 0.014728260226547718, "timestamp": "2025-09-10 02:27:51.485497", "step": 1777, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:51.514367", "step": 1777, "epoch": 1 }, { "type": "loss", "content": 0.016539910808205605, "timestamp": "2025-09-10 02:27:51.515779", "step": 1778, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:51.544624", "step": 1778, "epoch": 1 }, { "type": "loss", "content": 0.048096511512994766, "timestamp": "2025-09-10 02:27:51.546124", "step": 1779, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:51.574792", "step": 1779, "epoch": 1 }, { "type": "loss", "content": 0.0534992590546608, "timestamp": "2025-09-10 02:27:51.597820", "step": 1780, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:51.626356", "step": 1780, "epoch": 1 }, { "type": "loss", "content": 0.00945852417498827, "timestamp": "2025-09-10 02:27:51.628257", "step": 1781, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:51.657167", "step": 1781, "epoch": 1 }, { "type": "loss", "content": 0.027953948825597763, "timestamp": "2025-09-10 02:27:51.659144", "step": 1782, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:51.687641", "step": 1782, "epoch": 1 }, { "type": "loss", "content": 0.03516707941889763, "timestamp": "2025-09-10 02:27:51.689466", "step": 1783, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:51.717812", "step": 1783, "epoch": 1 }, { "type": "loss", "content": 0.033107005059719086, "timestamp": "2025-09-10 02:27:51.741063", "step": 1784, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:51.770018", "step": 1784, "epoch": 1 }, { "type": "loss", "content": 0.0065910592675209045, "timestamp": "2025-09-10 02:27:51.771652", "step": 1785, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:51.799944", "step": 1785, "epoch": 1 }, { "type": "loss", "content": 0.032838717103004456, "timestamp": "2025-09-10 02:27:51.801570", "step": 1786, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:51.830060", "step": 1786, "epoch": 1 }, { "type": "loss", "content": 0.0056396001018583775, "timestamp": "2025-09-10 02:27:51.831656", "step": 1787, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:51.860033", "step": 1787, "epoch": 1 }, { "type": "loss", "content": 0.00763358548283577, "timestamp": "2025-09-10 02:27:51.883250", "step": 1788, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:51.911800", "step": 1788, "epoch": 1 }, { "type": "loss", "content": 0.0351736806333065, "timestamp": "2025-09-10 02:27:51.913371", "step": 1789, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:51.942061", "step": 1789, "epoch": 1 }, { "type": "loss", "content": 0.023263007402420044, "timestamp": "2025-09-10 02:27:51.943778", "step": 1790, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:51.972170", "step": 1790, "epoch": 1 }, { "type": "loss", "content": 0.014233228750526905, "timestamp": "2025-09-10 02:27:51.973824", "step": 1791, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:52.002715", "step": 1791, "epoch": 1 }, { "type": "loss", "content": 0.012082560919225216, "timestamp": "2025-09-10 02:27:52.025890", "step": 1792, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:52.054958", "step": 1792, "epoch": 1 }, { "type": "loss", "content": 0.03389746695756912, "timestamp": "2025-09-10 02:27:52.056834", "step": 1793, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:52.092779", "step": 1793, "epoch": 1 }, { "type": "loss", "content": 0.021390317007899284, "timestamp": "2025-09-10 02:27:52.094305", "step": 1794, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:52.123961", "step": 1794, "epoch": 1 }, { "type": "loss", "content": 0.011533044278621674, "timestamp": "2025-09-10 02:27:52.125544", "step": 1795, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:52.153627", "step": 1795, "epoch": 1 }, { "type": "loss", "content": 0.04315054789185524, "timestamp": "2025-09-10 02:27:52.177096", "step": 1796, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:52.205618", "step": 1796, "epoch": 1 }, { "type": "loss", "content": 0.04506099969148636, "timestamp": "2025-09-10 02:27:52.207112", "step": 1797, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:52.236476", "step": 1797, "epoch": 1 }, { "type": "loss", "content": 0.023977842181921005, "timestamp": "2025-09-10 02:27:52.238015", "step": 1798, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:52.266445", "step": 1798, "epoch": 1 }, { "type": "loss", "content": 0.04149536415934563, "timestamp": "2025-09-10 02:27:52.267822", "step": 1799, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:52.296197", "step": 1799, "epoch": 1 }, { "type": "loss", "content": 0.016618778929114342, "timestamp": "2025-09-10 02:27:52.319183", "step": 1800, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:52.347810", "step": 1800, "epoch": 1 }, { "type": "loss", "content": 0.028144029900431633, "timestamp": "2025-09-10 02:27:52.349098", "step": 1801, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:52.377491", "step": 1801, "epoch": 1 }, { "type": "loss", "content": 0.006141290534287691, "timestamp": "2025-09-10 02:27:52.378890", "step": 1802, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:52.406995", "step": 1802, "epoch": 1 }, { "type": "loss", "content": 0.013461337424814701, "timestamp": "2025-09-10 02:27:52.408834", "step": 1803, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:52.436730", "step": 1803, "epoch": 1 }, { "type": "loss", "content": 0.015965865924954414, "timestamp": "2025-09-10 02:27:52.460006", "step": 1804, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:52.488941", "step": 1804, "epoch": 1 }, { "type": "loss", "content": 0.02222426049411297, "timestamp": "2025-09-10 02:27:52.490695", "step": 1805, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:52.519093", "step": 1805, "epoch": 1 }, { "type": "loss", "content": 0.02876662276685238, "timestamp": "2025-09-10 02:27:52.520877", "step": 1806, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:52.549576", "step": 1806, "epoch": 1 }, { "type": "loss", "content": 0.0073358905501663685, "timestamp": "2025-09-10 02:27:52.551237", "step": 1807, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:52.579334", "step": 1807, "epoch": 1 }, { "type": "loss", "content": 0.09105312824249268, "timestamp": "2025-09-10 02:27:52.602652", "step": 1808, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:52.630955", "step": 1808, "epoch": 1 }, { "type": "loss", "content": 0.03719841688871384, "timestamp": "2025-09-10 02:27:52.632810", "step": 1809, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:52.661080", "step": 1809, "epoch": 1 }, { "type": "loss", "content": 0.009422372095286846, "timestamp": "2025-09-10 02:27:52.662485", "step": 1810, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:52.691205", "step": 1810, "epoch": 1 }, { "type": "loss", "content": 0.045731209218502045, "timestamp": "2025-09-10 02:27:52.692998", "step": 1811, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:52.721224", "step": 1811, "epoch": 1 }, { "type": "loss", "content": 0.03382372483611107, "timestamp": "2025-09-10 02:27:52.744539", "step": 1812, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:52.773218", "step": 1812, "epoch": 1 }, { "type": "loss", "content": 0.01654665358364582, "timestamp": "2025-09-10 02:27:52.774861", "step": 1813, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:52.803448", "step": 1813, "epoch": 1 }, { "type": "loss", "content": 0.07755956798791885, "timestamp": "2025-09-10 02:27:52.805141", "step": 1814, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:52.833581", "step": 1814, "epoch": 1 }, { "type": "loss", "content": 0.09635764360427856, "timestamp": "2025-09-10 02:27:52.835169", "step": 1815, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:52.863702", "step": 1815, "epoch": 1 }, { "type": "loss", "content": 0.013378841802477837, "timestamp": "2025-09-10 02:27:52.886950", "step": 1816, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:52.916031", "step": 1816, "epoch": 1 }, { "type": "loss", "content": 0.023112650960683823, "timestamp": "2025-09-10 02:27:52.917431", "step": 1817, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:52.945574", "step": 1817, "epoch": 1 }, { "type": "loss", "content": 0.018076254054903984, "timestamp": "2025-09-10 02:27:52.947053", "step": 1818, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:52.975556", "step": 1818, "epoch": 1 }, { "type": "loss", "content": 0.010910548269748688, "timestamp": "2025-09-10 02:27:52.977176", "step": 1819, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:53.005951", "step": 1819, "epoch": 1 }, { "type": "loss", "content": 0.028995469212532043, "timestamp": "2025-09-10 02:27:53.029178", "step": 1820, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:53.057949", "step": 1820, "epoch": 1 }, { "type": "loss", "content": 0.05423533916473389, "timestamp": "2025-09-10 02:27:53.059432", "step": 1821, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:53.087928", "step": 1821, "epoch": 1 }, { "type": "loss", "content": 0.016812141984701157, "timestamp": "2025-09-10 02:27:53.089337", "step": 1822, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:53.117903", "step": 1822, "epoch": 1 }, { "type": "loss", "content": 0.004353975411504507, "timestamp": "2025-09-10 02:27:53.119348", "step": 1823, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:53.148327", "step": 1823, "epoch": 1 }, { "type": "loss", "content": 0.02070428803563118, "timestamp": "2025-09-10 02:27:53.171449", "step": 1824, "epoch": 1 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 96 ], "batch_size": 8, "flops": 1898404492032 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 5, 80 ], "batch_size": 8, "flops": 1582003754624 } ], "timestamp": "2025-09-10 02:27:55.044426", "step": 1824, "epoch": 1 }, { "type": "pplx", "content": 2774156.6199907036, "timestamp": "2025-09-10 02:27:55.046119", "step": 1824, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:55.074027", "step": 1824, "epoch": 1 }, { "type": "loss", "content": 0.04381697624921799, "timestamp": "2025-09-10 02:27:55.075721", "step": 1825, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:55.104334", "step": 1825, "epoch": 1 }, { "type": "loss", "content": 0.015221195295453072, "timestamp": "2025-09-10 02:27:55.111881", "step": 1826, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:55.142339", "step": 1826, "epoch": 1 }, { "type": "loss", "content": 0.018304746598005295, "timestamp": "2025-09-10 02:27:55.143992", "step": 1827, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:55.172284", "step": 1827, "epoch": 1 }, { "type": "loss", "content": 0.005829904694110155, "timestamp": "2025-09-10 02:27:55.195380", "step": 1828, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:55.226280", "step": 1828, "epoch": 1 }, { "type": "loss", "content": 0.08773591369390488, "timestamp": "2025-09-10 02:27:55.227891", "step": 1829, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:55.256005", "step": 1829, "epoch": 1 }, { "type": "loss", "content": 0.06778901070356369, "timestamp": "2025-09-10 02:27:55.259664", "step": 1830, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:55.291290", "step": 1830, "epoch": 1 }, { "type": "loss", "content": 0.01809806562960148, "timestamp": "2025-09-10 02:27:55.293003", "step": 1831, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:55.320927", "step": 1831, "epoch": 1 }, { "type": "loss", "content": 0.035624559968709946, "timestamp": "2025-09-10 02:27:55.343892", "step": 1832, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:55.372533", "step": 1832, "epoch": 1 }, { "type": "loss", "content": 0.02435780130326748, "timestamp": "2025-09-10 02:27:55.374215", "step": 1833, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:55.402562", "step": 1833, "epoch": 1 }, { "type": "loss", "content": 0.028359994292259216, "timestamp": "2025-09-10 02:27:55.405247", "step": 1834, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:55.436295", "step": 1834, "epoch": 1 }, { "type": "loss", "content": 0.01486204657703638, "timestamp": "2025-09-10 02:27:55.438308", "step": 1835, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:55.467550", "step": 1835, "epoch": 1 }, { "type": "loss", "content": 0.013306444510817528, "timestamp": "2025-09-10 02:27:55.490808", "step": 1836, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:55.519785", "step": 1836, "epoch": 1 }, { "type": "loss", "content": 0.021247971802949905, "timestamp": "2025-09-10 02:27:55.521520", "step": 1837, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:55.549764", "step": 1837, "epoch": 1 }, { "type": "loss", "content": 0.012678244151175022, "timestamp": "2025-09-10 02:27:55.551521", "step": 1838, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:55.580482", "step": 1838, "epoch": 1 }, { "type": "loss", "content": 0.07287485152482986, "timestamp": "2025-09-10 02:27:55.582110", "step": 1839, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:55.611036", "step": 1839, "epoch": 1 }, { "type": "loss", "content": 0.014401237480342388, "timestamp": "2025-09-10 02:27:55.634267", "step": 1840, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:55.662744", "step": 1840, "epoch": 1 }, { "type": "loss", "content": 0.037539489567279816, "timestamp": "2025-09-10 02:27:55.664501", "step": 1841, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:55.693288", "step": 1841, "epoch": 1 }, { "type": "loss", "content": 0.032391078770160675, "timestamp": "2025-09-10 02:27:55.694975", "step": 1842, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:55.723259", "step": 1842, "epoch": 1 }, { "type": "loss", "content": 0.005160854198038578, "timestamp": "2025-09-10 02:27:55.725047", "step": 1843, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:55.753635", "step": 1843, "epoch": 1 }, { "type": "loss", "content": 0.03755679354071617, "timestamp": "2025-09-10 02:27:55.776834", "step": 1844, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:55.805580", "step": 1844, "epoch": 1 }, { "type": "loss", "content": 0.06378152221441269, "timestamp": "2025-09-10 02:27:55.807021", "step": 1845, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:55.835711", "step": 1845, "epoch": 1 }, { "type": "loss", "content": 0.008414468728005886, "timestamp": "2025-09-10 02:27:55.837411", "step": 1846, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:55.866195", "step": 1846, "epoch": 1 }, { "type": "loss", "content": 0.04518602043390274, "timestamp": "2025-09-10 02:27:55.867807", "step": 1847, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:55.896849", "step": 1847, "epoch": 1 }, { "type": "loss", "content": 0.03599895164370537, "timestamp": "2025-09-10 02:27:55.920171", "step": 1848, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:55.948396", "step": 1848, "epoch": 1 }, { "type": "loss", "content": 0.03463702276349068, "timestamp": "2025-09-10 02:27:55.949980", "step": 1849, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:55.977963", "step": 1849, "epoch": 1 }, { "type": "loss", "content": 0.05188743397593498, "timestamp": "2025-09-10 02:27:55.979559", "step": 1850, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:56.008450", "step": 1850, "epoch": 1 }, { "type": "loss", "content": 0.04317900538444519, "timestamp": "2025-09-10 02:27:56.009918", "step": 1851, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:56.038346", "step": 1851, "epoch": 1 }, { "type": "loss", "content": 0.03403066471219063, "timestamp": "2025-09-10 02:27:56.061331", "step": 1852, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:56.089500", "step": 1852, "epoch": 1 }, { "type": "loss", "content": 0.016021737828850746, "timestamp": "2025-09-10 02:27:56.091131", "step": 1853, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:56.119634", "step": 1853, "epoch": 1 }, { "type": "loss", "content": 0.053274061530828476, "timestamp": "2025-09-10 02:27:56.121249", "step": 1854, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:56.149369", "step": 1854, "epoch": 1 }, { "type": "loss", "content": 0.027471955865621567, "timestamp": "2025-09-10 02:27:56.151139", "step": 1855, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:56.179492", "step": 1855, "epoch": 1 }, { "type": "loss", "content": 0.03777465969324112, "timestamp": "2025-09-10 02:27:56.202586", "step": 1856, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:56.231535", "step": 1856, "epoch": 1 }, { "type": "loss", "content": 0.01911143772304058, "timestamp": "2025-09-10 02:27:56.233235", "step": 1857, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:56.261914", "step": 1857, "epoch": 1 }, { "type": "loss", "content": 0.008062859997153282, "timestamp": "2025-09-10 02:27:56.263870", "step": 1858, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:56.292323", "step": 1858, "epoch": 1 }, { "type": "loss", "content": 0.03436744958162308, "timestamp": "2025-09-10 02:27:56.294145", "step": 1859, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:56.322601", "step": 1859, "epoch": 1 }, { "type": "loss", "content": 0.00785265862941742, "timestamp": "2025-09-10 02:27:56.345787", "step": 1860, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:56.374393", "step": 1860, "epoch": 1 }, { "type": "loss", "content": 0.05875316634774208, "timestamp": "2025-09-10 02:27:56.376141", "step": 1861, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:56.404486", "step": 1861, "epoch": 1 }, { "type": "loss", "content": 0.014822714030742645, "timestamp": "2025-09-10 02:27:56.406234", "step": 1862, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:56.434078", "step": 1862, "epoch": 1 }, { "type": "loss", "content": 0.07236051559448242, "timestamp": "2025-09-10 02:27:56.436222", "step": 1863, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:56.464640", "step": 1863, "epoch": 1 }, { "type": "loss", "content": 0.029560577124357224, "timestamp": "2025-09-10 02:27:56.487937", "step": 1864, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:56.516429", "step": 1864, "epoch": 1 }, { "type": "loss", "content": 0.04660656675696373, "timestamp": "2025-09-10 02:27:56.518046", "step": 1865, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:56.546153", "step": 1865, "epoch": 1 }, { "type": "loss", "content": 0.0362875834107399, "timestamp": "2025-09-10 02:27:56.548051", "step": 1866, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:56.576504", "step": 1866, "epoch": 1 }, { "type": "loss", "content": 0.011777542531490326, "timestamp": "2025-09-10 02:27:56.578155", "step": 1867, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:56.606576", "step": 1867, "epoch": 1 }, { "type": "loss", "content": 0.022310929372906685, "timestamp": "2025-09-10 02:27:56.629552", "step": 1868, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:56.658112", "step": 1868, "epoch": 1 }, { "type": "loss", "content": 0.02191806770861149, "timestamp": "2025-09-10 02:27:56.659698", "step": 1869, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:56.687668", "step": 1869, "epoch": 1 }, { "type": "loss", "content": 0.036225225776433945, "timestamp": "2025-09-10 02:27:56.689096", "step": 1870, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:56.717260", "step": 1870, "epoch": 1 }, { "type": "loss", "content": 0.013109169900417328, "timestamp": "2025-09-10 02:27:56.718550", "step": 1871, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:56.746228", "step": 1871, "epoch": 1 }, { "type": "loss", "content": 0.02598562464118004, "timestamp": "2025-09-10 02:27:56.769271", "step": 1872, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:56.797699", "step": 1872, "epoch": 1 }, { "type": "loss", "content": 0.046570923179388046, "timestamp": "2025-09-10 02:27:56.799236", "step": 1873, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:56.827812", "step": 1873, "epoch": 1 }, { "type": "loss", "content": 0.023213813081383705, "timestamp": "2025-09-10 02:27:56.829290", "step": 1874, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:56.857533", "step": 1874, "epoch": 1 }, { "type": "loss", "content": 0.016777101904153824, "timestamp": "2025-09-10 02:27:56.859059", "step": 1875, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:56.887138", "step": 1875, "epoch": 1 }, { "type": "loss", "content": 0.017548279836773872, "timestamp": "2025-09-10 02:27:56.910147", "step": 1876, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:56.937931", "step": 1876, "epoch": 1 }, { "type": "loss", "content": 0.013116302900016308, "timestamp": "2025-09-10 02:27:56.939598", "step": 1877, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:56.967669", "step": 1877, "epoch": 1 }, { "type": "loss", "content": 0.04197695478796959, "timestamp": "2025-09-10 02:27:56.969306", "step": 1878, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:56.997603", "step": 1878, "epoch": 1 }, { "type": "loss", "content": 0.021345878019928932, "timestamp": "2025-09-10 02:27:56.999307", "step": 1879, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:57.027495", "step": 1879, "epoch": 1 }, { "type": "loss", "content": 0.021132279187440872, "timestamp": "2025-09-10 02:27:57.050416", "step": 1880, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:57.078538", "step": 1880, "epoch": 1 }, { "type": "loss", "content": 0.018463974818587303, "timestamp": "2025-09-10 02:27:57.079967", "step": 1881, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:57.108136", "step": 1881, "epoch": 1 }, { "type": "loss", "content": 0.009572668001055717, "timestamp": "2025-09-10 02:27:57.109593", "step": 1882, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:57.137622", "step": 1882, "epoch": 1 }, { "type": "loss", "content": 0.026923833414912224, "timestamp": "2025-09-10 02:27:57.139341", "step": 1883, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:57.167719", "step": 1883, "epoch": 1 }, { "type": "loss", "content": 0.015519418753683567, "timestamp": "2025-09-10 02:27:57.190900", "step": 1884, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:57.219265", "step": 1884, "epoch": 1 }, { "type": "loss", "content": 0.03241514042019844, "timestamp": "2025-09-10 02:27:57.220852", "step": 1885, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:57.248956", "step": 1885, "epoch": 1 }, { "type": "loss", "content": 0.013597256503999233, "timestamp": "2025-09-10 02:27:57.250464", "step": 1886, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:57.278928", "step": 1886, "epoch": 1 }, { "type": "loss", "content": 0.04358609393239021, "timestamp": "2025-09-10 02:27:57.280393", "step": 1887, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:57.308394", "step": 1887, "epoch": 1 }, { "type": "loss", "content": 0.040268637239933014, "timestamp": "2025-09-10 02:27:57.331592", "step": 1888, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:57.359754", "step": 1888, "epoch": 1 }, { "type": "loss", "content": 0.05240333825349808, "timestamp": "2025-09-10 02:27:57.361378", "step": 1889, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:57.389827", "step": 1889, "epoch": 1 }, { "type": "loss", "content": 0.007719197776168585, "timestamp": "2025-09-10 02:27:57.391255", "step": 1890, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:57.419479", "step": 1890, "epoch": 1 }, { "type": "loss", "content": 0.03898076340556145, "timestamp": "2025-09-10 02:27:57.420754", "step": 1891, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:57.448447", "step": 1891, "epoch": 1 }, { "type": "loss", "content": 0.06327030807733536, "timestamp": "2025-09-10 02:27:57.471573", "step": 1892, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:57.500236", "step": 1892, "epoch": 1 }, { "type": "loss", "content": 0.018281977623701096, "timestamp": "2025-09-10 02:27:57.501908", "step": 1893, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:57.530238", "step": 1893, "epoch": 1 }, { "type": "loss", "content": 0.03632773086428642, "timestamp": "2025-09-10 02:27:57.531973", "step": 1894, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:57.559938", "step": 1894, "epoch": 1 }, { "type": "loss", "content": 0.013442902825772762, "timestamp": "2025-09-10 02:27:57.561377", "step": 1895, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:57.589213", "step": 1895, "epoch": 1 }, { "type": "loss", "content": 0.0278884656727314, "timestamp": "2025-09-10 02:27:57.612139", "step": 1896, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:57.639805", "step": 1896, "epoch": 1 }, { "type": "loss", "content": 0.021757736802101135, "timestamp": "2025-09-10 02:27:57.641239", "step": 1897, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:57.669692", "step": 1897, "epoch": 1 }, { "type": "loss", "content": 0.015394957736134529, "timestamp": "2025-09-10 02:27:57.671313", "step": 1898, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:27:57.699685", "step": 1898, "epoch": 1 }, { "type": "loss", "content": 0.046225737780332565, "timestamp": "2025-09-10 02:27:57.701354", "step": 1899, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:57.730185", "step": 1899, "epoch": 1 }, { "type": "loss", "content": 0.026236888021230698, "timestamp": "2025-09-10 02:27:57.753285", "step": 1900, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:57.781986", "step": 1900, "epoch": 1 }, { "type": "loss", "content": 0.04182019457221031, "timestamp": "2025-09-10 02:27:57.783790", "step": 1901, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:57.811985", "step": 1901, "epoch": 1 }, { "type": "loss", "content": 0.04555434733629227, "timestamp": "2025-09-10 02:27:57.813453", "step": 1902, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:57.841323", "step": 1902, "epoch": 1 }, { "type": "loss", "content": 0.015747955068945885, "timestamp": "2025-09-10 02:27:57.842793", "step": 1903, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:27:57.870995", "step": 1903, "epoch": 1 }, { "type": "loss", "content": 0.01313038356602192, "timestamp": "2025-09-10 02:27:57.893997", "step": 1904, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:57.922386", "step": 1904, "epoch": 1 }, { "type": "loss", "content": 0.02328430488705635, "timestamp": "2025-09-10 02:27:57.923895", "step": 1905, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:57.951902", "step": 1905, "epoch": 1 }, { "type": "loss", "content": 0.0312882624566555, "timestamp": "2025-09-10 02:27:57.956924", "step": 1906, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:57.985419", "step": 1906, "epoch": 1 }, { "type": "loss", "content": 0.009373379871249199, "timestamp": "2025-09-10 02:27:57.987010", "step": 1907, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:58.015272", "step": 1907, "epoch": 1 }, { "type": "loss", "content": 0.037006132304668427, "timestamp": "2025-09-10 02:27:58.039093", "step": 1908, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:58.067188", "step": 1908, "epoch": 1 }, { "type": "loss", "content": 0.05433737486600876, "timestamp": "2025-09-10 02:27:58.068758", "step": 1909, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:58.096849", "step": 1909, "epoch": 1 }, { "type": "loss", "content": 0.023593587800860405, "timestamp": "2025-09-10 02:27:58.098139", "step": 1910, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:58.126323", "step": 1910, "epoch": 1 }, { "type": "loss", "content": 0.02516046352684498, "timestamp": "2025-09-10 02:27:58.127701", "step": 1911, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:58.156830", "step": 1911, "epoch": 1 }, { "type": "loss", "content": 0.017987674102187157, "timestamp": "2025-09-10 02:27:58.179821", "step": 1912, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:58.207989", "step": 1912, "epoch": 1 }, { "type": "loss", "content": 0.015304679982364178, "timestamp": "2025-09-10 02:27:58.209568", "step": 1913, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:58.237308", "step": 1913, "epoch": 1 }, { "type": "loss", "content": 0.007738722953945398, "timestamp": "2025-09-10 02:27:58.238958", "step": 1914, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:58.267360", "step": 1914, "epoch": 1 }, { "type": "loss", "content": 0.013014032505452633, "timestamp": "2025-09-10 02:27:58.268758", "step": 1915, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:58.296747", "step": 1915, "epoch": 1 }, { "type": "loss", "content": 0.04582571983337402, "timestamp": "2025-09-10 02:27:58.322292", "step": 1916, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:58.350059", "step": 1916, "epoch": 1 }, { "type": "loss", "content": 0.013117613270878792, "timestamp": "2025-09-10 02:27:58.351489", "step": 1917, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:58.381128", "step": 1917, "epoch": 1 }, { "type": "loss", "content": 0.004100994672626257, "timestamp": "2025-09-10 02:27:58.382574", "step": 1918, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:58.410602", "step": 1918, "epoch": 1 }, { "type": "loss", "content": 0.020396525040268898, "timestamp": "2025-09-10 02:27:58.411818", "step": 1919, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:58.439879", "step": 1919, "epoch": 1 }, { "type": "loss", "content": 0.011074303649365902, "timestamp": "2025-09-10 02:27:58.462922", "step": 1920, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:58.491566", "step": 1920, "epoch": 1 }, { "type": "loss", "content": 0.05970010533928871, "timestamp": "2025-09-10 02:27:58.493194", "step": 1921, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:58.521290", "step": 1921, "epoch": 1 }, { "type": "loss", "content": 0.032971110194921494, "timestamp": "2025-09-10 02:27:58.522764", "step": 1922, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:58.551138", "step": 1922, "epoch": 1 }, { "type": "loss", "content": 0.07221423834562302, "timestamp": "2025-09-10 02:27:58.552607", "step": 1923, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:58.580543", "step": 1923, "epoch": 1 }, { "type": "loss", "content": 0.09146525710821152, "timestamp": "2025-09-10 02:27:58.603608", "step": 1924, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:58.632006", "step": 1924, "epoch": 1 }, { "type": "loss", "content": 0.029159855097532272, "timestamp": "2025-09-10 02:27:58.633460", "step": 1925, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:58.661485", "step": 1925, "epoch": 1 }, { "type": "loss", "content": 0.013702225871384144, "timestamp": "2025-09-10 02:27:58.666154", "step": 1926, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:58.702390", "step": 1926, "epoch": 1 }, { "type": "loss", "content": 0.0874093621969223, "timestamp": "2025-09-10 02:27:58.703827", "step": 1927, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:58.738075", "step": 1927, "epoch": 1 }, { "type": "loss", "content": 0.03918251767754555, "timestamp": "2025-09-10 02:27:58.761087", "step": 1928, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:58.789361", "step": 1928, "epoch": 1 }, { "type": "loss", "content": 0.05587069317698479, "timestamp": "2025-09-10 02:27:58.790904", "step": 1929, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:58.829615", "step": 1929, "epoch": 1 }, { "type": "loss", "content": 0.0371037982404232, "timestamp": "2025-09-10 02:27:58.833946", "step": 1930, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:58.862607", "step": 1930, "epoch": 1 }, { "type": "loss", "content": 0.048507921397686005, "timestamp": "2025-09-10 02:27:58.864200", "step": 1931, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:58.892387", "step": 1931, "epoch": 1 }, { "type": "loss", "content": 0.024592606350779533, "timestamp": "2025-09-10 02:27:58.915377", "step": 1932, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:58.943879", "step": 1932, "epoch": 1 }, { "type": "loss", "content": 0.008353027515113354, "timestamp": "2025-09-10 02:27:58.945380", "step": 1933, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:58.974956", "step": 1933, "epoch": 1 }, { "type": "loss", "content": 0.03610652685165405, "timestamp": "2025-09-10 02:27:58.976610", "step": 1934, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:59.004927", "step": 1934, "epoch": 1 }, { "type": "loss", "content": 0.04270109534263611, "timestamp": "2025-09-10 02:27:59.006667", "step": 1935, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:59.034713", "step": 1935, "epoch": 1 }, { "type": "loss", "content": 0.09751912206411362, "timestamp": "2025-09-10 02:27:59.057936", "step": 1936, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:59.086197", "step": 1936, "epoch": 1 }, { "type": "loss", "content": 0.025234397500753403, "timestamp": "2025-09-10 02:27:59.088059", "step": 1937, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:59.115999", "step": 1937, "epoch": 1 }, { "type": "loss", "content": 0.018462875857949257, "timestamp": "2025-09-10 02:27:59.117593", "step": 1938, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:59.145324", "step": 1938, "epoch": 1 }, { "type": "loss", "content": 0.056947946548461914, "timestamp": "2025-09-10 02:27:59.146835", "step": 1939, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:59.174887", "step": 1939, "epoch": 1 }, { "type": "loss", "content": 0.04623355343937874, "timestamp": "2025-09-10 02:27:59.197854", "step": 1940, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:59.226184", "step": 1940, "epoch": 1 }, { "type": "loss", "content": 0.01078362949192524, "timestamp": "2025-09-10 02:27:59.227447", "step": 1941, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:59.255446", "step": 1941, "epoch": 1 }, { "type": "loss", "content": 0.059156108647584915, "timestamp": "2025-09-10 02:27:59.256934", "step": 1942, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:59.285262", "step": 1942, "epoch": 1 }, { "type": "loss", "content": 0.030599039047956467, "timestamp": "2025-09-10 02:27:59.287771", "step": 1943, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:59.320679", "step": 1943, "epoch": 1 }, { "type": "loss", "content": 0.017089087516069412, "timestamp": "2025-09-10 02:27:59.343468", "step": 1944, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:59.371557", "step": 1944, "epoch": 1 }, { "type": "loss", "content": 0.04193213954567909, "timestamp": "2025-09-10 02:27:59.372992", "step": 1945, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:59.400821", "step": 1945, "epoch": 1 }, { "type": "loss", "content": 0.028696786612272263, "timestamp": "2025-09-10 02:27:59.402541", "step": 1946, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:59.430975", "step": 1946, "epoch": 1 }, { "type": "loss", "content": 0.015457267872989178, "timestamp": "2025-09-10 02:27:59.432572", "step": 1947, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:59.460720", "step": 1947, "epoch": 1 }, { "type": "loss", "content": 0.05320663005113602, "timestamp": "2025-09-10 02:27:59.483802", "step": 1948, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:59.512074", "step": 1948, "epoch": 1 }, { "type": "loss", "content": 0.014334471896290779, "timestamp": "2025-09-10 02:27:59.513497", "step": 1949, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:59.541383", "step": 1949, "epoch": 1 }, { "type": "loss", "content": 0.021078793331980705, "timestamp": "2025-09-10 02:27:59.542952", "step": 1950, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:59.571261", "step": 1950, "epoch": 1 }, { "type": "loss", "content": 0.04999444633722305, "timestamp": "2025-09-10 02:27:59.572992", "step": 1951, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:59.601354", "step": 1951, "epoch": 1 }, { "type": "loss", "content": 0.02752426452934742, "timestamp": "2025-09-10 02:27:59.624668", "step": 1952, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:59.653011", "step": 1952, "epoch": 1 }, { "type": "loss", "content": 0.019825072959065437, "timestamp": "2025-09-10 02:27:59.654564", "step": 1953, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:59.682549", "step": 1953, "epoch": 1 }, { "type": "loss", "content": 0.02840333990752697, "timestamp": "2025-09-10 02:27:59.684830", "step": 1954, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:27:59.715603", "step": 1954, "epoch": 1 }, { "type": "loss", "content": 0.052610281854867935, "timestamp": "2025-09-10 02:27:59.719938", "step": 1955, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:59.748377", "step": 1955, "epoch": 1 }, { "type": "loss", "content": 0.03353895619511604, "timestamp": "2025-09-10 02:27:59.771857", "step": 1956, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:59.802452", "step": 1956, "epoch": 1 }, { "type": "loss", "content": 0.02799171581864357, "timestamp": "2025-09-10 02:27:59.804953", "step": 1957, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:59.834426", "step": 1957, "epoch": 1 }, { "type": "loss", "content": 0.06994835287332535, "timestamp": "2025-09-10 02:27:59.835942", "step": 1958, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:27:59.863918", "step": 1958, "epoch": 1 }, { "type": "loss", "content": 0.019334720447659492, "timestamp": "2025-09-10 02:27:59.865655", "step": 1959, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:59.893692", "step": 1959, "epoch": 1 }, { "type": "loss", "content": 0.04380827397108078, "timestamp": "2025-09-10 02:27:59.916696", "step": 1960, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:59.945266", "step": 1960, "epoch": 1 }, { "type": "loss", "content": 0.0397782102227211, "timestamp": "2025-09-10 02:27:59.946693", "step": 1961, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:27:59.974745", "step": 1961, "epoch": 1 }, { "type": "loss", "content": 0.016844620928168297, "timestamp": "2025-09-10 02:27:59.976314", "step": 1962, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:00.004609", "step": 1962, "epoch": 1 }, { "type": "loss", "content": 0.027142390608787537, "timestamp": "2025-09-10 02:28:00.006237", "step": 1963, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:00.035147", "step": 1963, "epoch": 1 }, { "type": "loss", "content": 0.030943524092435837, "timestamp": "2025-09-10 02:28:00.058226", "step": 1964, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:00.086477", "step": 1964, "epoch": 1 }, { "type": "loss", "content": 0.03292815759778023, "timestamp": "2025-09-10 02:28:00.088051", "step": 1965, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:00.115960", "step": 1965, "epoch": 1 }, { "type": "loss", "content": 0.050953131169080734, "timestamp": "2025-09-10 02:28:00.117706", "step": 1966, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:00.145912", "step": 1966, "epoch": 1 }, { "type": "loss", "content": 0.023091718554496765, "timestamp": "2025-09-10 02:28:00.147427", "step": 1967, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:00.175591", "step": 1967, "epoch": 1 }, { "type": "loss", "content": 0.021349238231778145, "timestamp": "2025-09-10 02:28:00.198578", "step": 1968, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:00.227110", "step": 1968, "epoch": 1 }, { "type": "loss", "content": 0.03239194676280022, "timestamp": "2025-09-10 02:28:00.228645", "step": 1969, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:00.256590", "step": 1969, "epoch": 1 }, { "type": "loss", "content": 0.029955726116895676, "timestamp": "2025-09-10 02:28:00.258169", "step": 1970, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:00.286403", "step": 1970, "epoch": 1 }, { "type": "loss", "content": 0.01877775602042675, "timestamp": "2025-09-10 02:28:00.287901", "step": 1971, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:00.316128", "step": 1971, "epoch": 1 }, { "type": "loss", "content": 0.025350505486130714, "timestamp": "2025-09-10 02:28:00.339126", "step": 1972, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:00.367175", "step": 1972, "epoch": 1 }, { "type": "loss", "content": 0.04922991245985031, "timestamp": "2025-09-10 02:28:00.368590", "step": 1973, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:00.396631", "step": 1973, "epoch": 1 }, { "type": "loss", "content": 0.02344900369644165, "timestamp": "2025-09-10 02:28:00.398219", "step": 1974, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:00.429741", "step": 1974, "epoch": 1 }, { "type": "loss", "content": 0.04036623612046242, "timestamp": "2025-09-10 02:28:00.431013", "step": 1975, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:00.458531", "step": 1975, "epoch": 1 }, { "type": "loss", "content": 0.028169851750135422, "timestamp": "2025-09-10 02:28:00.481467", "step": 1976, "epoch": 1 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 96 ], "batch_size": 8, "flops": 1898404492032 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 5, 80 ], "batch_size": 8, "flops": 1582003754624 } ], "timestamp": "2025-09-10 02:28:02.327132", "step": 1976, "epoch": 1 }, { "type": "pplx", "content": 2335998.333224945, "timestamp": "2025-09-10 02:28:02.328617", "step": 1976, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:02.355782", "step": 1976, "epoch": 1 }, { "type": "loss", "content": 0.04719608649611473, "timestamp": "2025-09-10 02:28:02.357301", "step": 1977, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:02.386114", "step": 1977, "epoch": 1 }, { "type": "loss", "content": 0.019742654636502266, "timestamp": "2025-09-10 02:28:02.387564", "step": 1978, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:02.416182", "step": 1978, "epoch": 1 }, { "type": "loss", "content": 0.07820205390453339, "timestamp": "2025-09-10 02:28:02.417893", "step": 1979, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:02.445727", "step": 1979, "epoch": 1 }, { "type": "loss", "content": 0.005003311205655336, "timestamp": "2025-09-10 02:28:02.468829", "step": 1980, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:02.497727", "step": 1980, "epoch": 1 }, { "type": "loss", "content": 0.07674206793308258, "timestamp": "2025-09-10 02:28:02.499184", "step": 1981, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:28:02.527551", "step": 1981, "epoch": 1 }, { "type": "loss", "content": 0.051874928176403046, "timestamp": "2025-09-10 02:28:02.528771", "step": 1982, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:02.556847", "step": 1982, "epoch": 1 }, { "type": "loss", "content": 0.023504799231886864, "timestamp": "2025-09-10 02:28:02.558449", "step": 1983, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:02.586564", "step": 1983, "epoch": 1 }, { "type": "loss", "content": 0.02263377234339714, "timestamp": "2025-09-10 02:28:02.609618", "step": 1984, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:02.637946", "step": 1984, "epoch": 1 }, { "type": "loss", "content": 0.04072608798742294, "timestamp": "2025-09-10 02:28:02.639382", "step": 1985, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:02.667982", "step": 1985, "epoch": 1 }, { "type": "loss", "content": 0.04306507483124733, "timestamp": "2025-09-10 02:28:02.669472", "step": 1986, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:02.697784", "step": 1986, "epoch": 1 }, { "type": "loss", "content": 0.030287135392427444, "timestamp": "2025-09-10 02:28:02.699318", "step": 1987, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:02.727646", "step": 1987, "epoch": 1 }, { "type": "loss", "content": 0.02686193212866783, "timestamp": "2025-09-10 02:28:02.751879", "step": 1988, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:02.780490", "step": 1988, "epoch": 1 }, { "type": "loss", "content": 0.03202953562140465, "timestamp": "2025-09-10 02:28:02.782452", "step": 1989, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:02.810908", "step": 1989, "epoch": 1 }, { "type": "loss", "content": 0.03473440185189247, "timestamp": "2025-09-10 02:28:02.812657", "step": 1990, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:02.841133", "step": 1990, "epoch": 1 }, { "type": "loss", "content": 0.02686433121562004, "timestamp": "2025-09-10 02:28:02.842623", "step": 1991, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:02.870794", "step": 1991, "epoch": 1 }, { "type": "loss", "content": 0.01151194330304861, "timestamp": "2025-09-10 02:28:02.893884", "step": 1992, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:02.922163", "step": 1992, "epoch": 1 }, { "type": "loss", "content": 0.019326459616422653, "timestamp": "2025-09-10 02:28:02.923684", "step": 1993, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:02.951826", "step": 1993, "epoch": 1 }, { "type": "loss", "content": 0.027592310681939125, "timestamp": "2025-09-10 02:28:02.953218", "step": 1994, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:02.981941", "step": 1994, "epoch": 1 }, { "type": "loss", "content": 0.010238692164421082, "timestamp": "2025-09-10 02:28:02.983361", "step": 1995, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:03.011858", "step": 1995, "epoch": 1 }, { "type": "loss", "content": 0.018648786470294, "timestamp": "2025-09-10 02:28:03.034877", "step": 1996, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:03.063371", "step": 1996, "epoch": 1 }, { "type": "loss", "content": 0.03684840351343155, "timestamp": "2025-09-10 02:28:03.065004", "step": 1997, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:03.093270", "step": 1997, "epoch": 1 }, { "type": "loss", "content": 0.014682809822261333, "timestamp": "2025-09-10 02:28:03.094888", "step": 1998, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:03.123166", "step": 1998, "epoch": 1 }, { "type": "loss", "content": 0.04274337366223335, "timestamp": "2025-09-10 02:28:03.124590", "step": 1999, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:03.152658", "step": 1999, "epoch": 1 }, { "type": "loss", "content": 0.033814895898103714, "timestamp": "2025-09-10 02:28:03.175685", "step": 2000, "epoch": 1 }, { "type": "info", "content": "Checkpoint saved at step 2000", "timestamp": "2025-09-10 02:28:07.631862", "step": 2000, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:07.667761", "step": 2000, "epoch": 1 }, { "type": "loss", "content": 0.029487574473023415, "timestamp": "2025-09-10 02:28:07.669629", "step": 2001, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:07.699836", "step": 2001, "epoch": 1 }, { "type": "loss", "content": 0.01706775650382042, "timestamp": "2025-09-10 02:28:07.702279", "step": 2002, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:07.731638", "step": 2002, "epoch": 1 }, { "type": "loss", "content": 0.03300907462835312, "timestamp": "2025-09-10 02:28:07.733524", "step": 2003, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:07.762523", "step": 2003, "epoch": 1 }, { "type": "loss", "content": 0.06733154505491257, "timestamp": "2025-09-10 02:28:07.785798", "step": 2004, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:07.814688", "step": 2004, "epoch": 1 }, { "type": "loss", "content": 0.04184475913643837, "timestamp": "2025-09-10 02:28:07.816496", "step": 2005, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:07.845566", "step": 2005, "epoch": 1 }, { "type": "loss", "content": 0.04564382880926132, "timestamp": "2025-09-10 02:28:07.847163", "step": 2006, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:28:07.875977", "step": 2006, "epoch": 1 }, { "type": "loss", "content": 0.03412893787026405, "timestamp": "2025-09-10 02:28:07.877695", "step": 2007, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:07.906286", "step": 2007, "epoch": 1 }, { "type": "loss", "content": 0.022876108065247536, "timestamp": "2025-09-10 02:28:07.929760", "step": 2008, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:07.958196", "step": 2008, "epoch": 1 }, { "type": "loss", "content": 0.018554767593741417, "timestamp": "2025-09-10 02:28:07.959958", "step": 2009, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:07.988679", "step": 2009, "epoch": 1 }, { "type": "loss", "content": 0.023886600509285927, "timestamp": "2025-09-10 02:28:07.990056", "step": 2010, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:08.018577", "step": 2010, "epoch": 1 }, { "type": "loss", "content": 0.04853728041052818, "timestamp": "2025-09-10 02:28:08.020218", "step": 2011, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:08.048990", "step": 2011, "epoch": 1 }, { "type": "loss", "content": 0.019898338243365288, "timestamp": "2025-09-10 02:28:08.072154", "step": 2012, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:08.100911", "step": 2012, "epoch": 1 }, { "type": "loss", "content": 0.019382771104574203, "timestamp": "2025-09-10 02:28:08.102383", "step": 2013, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:08.130951", "step": 2013, "epoch": 1 }, { "type": "loss", "content": 0.06830476224422455, "timestamp": "2025-09-10 02:28:08.132408", "step": 2014, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:08.161088", "step": 2014, "epoch": 1 }, { "type": "loss", "content": 0.005032069515436888, "timestamp": "2025-09-10 02:28:08.162516", "step": 2015, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:08.190615", "step": 2015, "epoch": 1 }, { "type": "loss", "content": 0.012147782370448112, "timestamp": "2025-09-10 02:28:08.213740", "step": 2016, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:08.242440", "step": 2016, "epoch": 1 }, { "type": "loss", "content": 0.04396127536892891, "timestamp": "2025-09-10 02:28:08.244171", "step": 2017, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:08.272480", "step": 2017, "epoch": 1 }, { "type": "loss", "content": 0.020190289244055748, "timestamp": "2025-09-10 02:28:08.274367", "step": 2018, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:08.303270", "step": 2018, "epoch": 1 }, { "type": "loss", "content": 0.02030044235289097, "timestamp": "2025-09-10 02:28:08.304980", "step": 2019, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:08.333182", "step": 2019, "epoch": 1 }, { "type": "loss", "content": 0.05512828379869461, "timestamp": "2025-09-10 02:28:08.356347", "step": 2020, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:08.385146", "step": 2020, "epoch": 1 }, { "type": "loss", "content": 0.02449539303779602, "timestamp": "2025-09-10 02:28:08.386855", "step": 2021, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:08.415908", "step": 2021, "epoch": 1 }, { "type": "loss", "content": 0.035348691046237946, "timestamp": "2025-09-10 02:28:08.417545", "step": 2022, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:08.445813", "step": 2022, "epoch": 1 }, { "type": "loss", "content": 0.03864375129342079, "timestamp": "2025-09-10 02:28:08.447387", "step": 2023, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:08.475909", "step": 2023, "epoch": 1 }, { "type": "loss", "content": 0.09501069784164429, "timestamp": "2025-09-10 02:28:08.499069", "step": 2024, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:08.528299", "step": 2024, "epoch": 1 }, { "type": "loss", "content": 0.008634706027805805, "timestamp": "2025-09-10 02:28:08.530118", "step": 2025, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:08.558863", "step": 2025, "epoch": 1 }, { "type": "loss", "content": 0.004540830384939909, "timestamp": "2025-09-10 02:28:08.560675", "step": 2026, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:08.589257", "step": 2026, "epoch": 1 }, { "type": "loss", "content": 0.018655642867088318, "timestamp": "2025-09-10 02:28:08.591198", "step": 2027, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:08.620045", "step": 2027, "epoch": 1 }, { "type": "loss", "content": 0.04642891511321068, "timestamp": "2025-09-10 02:28:08.643270", "step": 2028, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:08.672064", "step": 2028, "epoch": 1 }, { "type": "loss", "content": 0.034627027809619904, "timestamp": "2025-09-10 02:28:08.673847", "step": 2029, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:08.702917", "step": 2029, "epoch": 1 }, { "type": "loss", "content": 0.016922222450375557, "timestamp": "2025-09-10 02:28:08.704895", "step": 2030, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:08.733540", "step": 2030, "epoch": 1 }, { "type": "loss", "content": 0.07418196648359299, "timestamp": "2025-09-10 02:28:08.735381", "step": 2031, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:08.764541", "step": 2031, "epoch": 1 }, { "type": "loss", "content": 0.01996367797255516, "timestamp": "2025-09-10 02:28:08.787835", "step": 2032, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:08.816941", "step": 2032, "epoch": 1 }, { "type": "loss", "content": 0.019962644204497337, "timestamp": "2025-09-10 02:28:08.818809", "step": 2033, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:08.847373", "step": 2033, "epoch": 1 }, { "type": "loss", "content": 0.03800464794039726, "timestamp": "2025-09-10 02:28:08.849170", "step": 2034, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:08.877637", "step": 2034, "epoch": 1 }, { "type": "loss", "content": 0.029811426997184753, "timestamp": "2025-09-10 02:28:08.879234", "step": 2035, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:08.907786", "step": 2035, "epoch": 1 }, { "type": "loss", "content": 0.026991935446858406, "timestamp": "2025-09-10 02:28:08.930858", "step": 2036, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:08.960109", "step": 2036, "epoch": 1 }, { "type": "loss", "content": 0.020711610093712807, "timestamp": "2025-09-10 02:28:08.961683", "step": 2037, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:08.990633", "step": 2037, "epoch": 1 }, { "type": "loss", "content": 0.015253338031470776, "timestamp": "2025-09-10 02:28:08.992313", "step": 2038, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:09.020723", "step": 2038, "epoch": 1 }, { "type": "loss", "content": 0.02093043550848961, "timestamp": "2025-09-10 02:28:09.022331", "step": 2039, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:09.051248", "step": 2039, "epoch": 1 }, { "type": "loss", "content": 0.009021738544106483, "timestamp": "2025-09-10 02:28:09.074144", "step": 2040, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:09.102971", "step": 2040, "epoch": 1 }, { "type": "loss", "content": 0.03510228544473648, "timestamp": "2025-09-10 02:28:09.104680", "step": 2041, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:09.133246", "step": 2041, "epoch": 1 }, { "type": "loss", "content": 0.03412038832902908, "timestamp": "2025-09-10 02:28:09.134886", "step": 2042, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:09.163480", "step": 2042, "epoch": 1 }, { "type": "loss", "content": 0.030756931751966476, "timestamp": "2025-09-10 02:28:09.165215", "step": 2043, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:09.194082", "step": 2043, "epoch": 1 }, { "type": "loss", "content": 0.00732546066865325, "timestamp": "2025-09-10 02:28:09.217456", "step": 2044, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:09.246439", "step": 2044, "epoch": 1 }, { "type": "loss", "content": 0.025469835847616196, "timestamp": "2025-09-10 02:28:09.248093", "step": 2045, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:09.276472", "step": 2045, "epoch": 1 }, { "type": "loss", "content": 0.03232096508145332, "timestamp": "2025-09-10 02:28:09.278311", "step": 2046, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:09.307133", "step": 2046, "epoch": 1 }, { "type": "loss", "content": 0.017231693491339684, "timestamp": "2025-09-10 02:28:09.309033", "step": 2047, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:09.337744", "step": 2047, "epoch": 1 }, { "type": "loss", "content": 0.020491067320108414, "timestamp": "2025-09-10 02:28:09.360873", "step": 2048, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:09.389569", "step": 2048, "epoch": 1 }, { "type": "loss", "content": 0.028713207691907883, "timestamp": "2025-09-10 02:28:09.391469", "step": 2049, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:09.420005", "step": 2049, "epoch": 1 }, { "type": "loss", "content": 0.050764452666044235, "timestamp": "2025-09-10 02:28:09.421886", "step": 2050, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:09.450643", "step": 2050, "epoch": 1 }, { "type": "loss", "content": 0.050802286714315414, "timestamp": "2025-09-10 02:28:09.452409", "step": 2051, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:09.481339", "step": 2051, "epoch": 1 }, { "type": "loss", "content": 0.010211057029664516, "timestamp": "2025-09-10 02:28:09.504347", "step": 2052, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:09.533527", "step": 2052, "epoch": 1 }, { "type": "loss", "content": 0.025202931836247444, "timestamp": "2025-09-10 02:28:09.535162", "step": 2053, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:09.564202", "step": 2053, "epoch": 1 }, { "type": "loss", "content": 0.043125562369823456, "timestamp": "2025-09-10 02:28:09.565687", "step": 2054, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:09.594363", "step": 2054, "epoch": 1 }, { "type": "loss", "content": 0.012664678506553173, "timestamp": "2025-09-10 02:28:09.596134", "step": 2055, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:09.625237", "step": 2055, "epoch": 1 }, { "type": "loss", "content": 0.033021651208400726, "timestamp": "2025-09-10 02:28:09.648294", "step": 2056, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:09.676801", "step": 2056, "epoch": 1 }, { "type": "loss", "content": 0.027453215792775154, "timestamp": "2025-09-10 02:28:09.678539", "step": 2057, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:09.707599", "step": 2057, "epoch": 1 }, { "type": "loss", "content": 0.025804953649640083, "timestamp": "2025-09-10 02:28:09.709304", "step": 2058, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:09.738240", "step": 2058, "epoch": 1 }, { "type": "loss", "content": 0.0035124425776302814, "timestamp": "2025-09-10 02:28:09.740208", "step": 2059, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:09.768655", "step": 2059, "epoch": 1 }, { "type": "loss", "content": 0.03783496841788292, "timestamp": "2025-09-10 02:28:09.792052", "step": 2060, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:09.821319", "step": 2060, "epoch": 1 }, { "type": "loss", "content": 0.008713445626199245, "timestamp": "2025-09-10 02:28:09.823300", "step": 2061, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:09.852668", "step": 2061, "epoch": 1 }, { "type": "loss", "content": 0.026288988068699837, "timestamp": "2025-09-10 02:28:09.854425", "step": 2062, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:09.883916", "step": 2062, "epoch": 1 }, { "type": "loss", "content": 0.022028058767318726, "timestamp": "2025-09-10 02:28:09.885695", "step": 2063, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:09.914300", "step": 2063, "epoch": 1 }, { "type": "loss", "content": 0.0448044128715992, "timestamp": "2025-09-10 02:28:09.937686", "step": 2064, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:09.966331", "step": 2064, "epoch": 1 }, { "type": "loss", "content": 0.012004708871245384, "timestamp": "2025-09-10 02:28:09.968078", "step": 2065, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:09.996951", "step": 2065, "epoch": 1 }, { "type": "loss", "content": 0.023711347952485085, "timestamp": "2025-09-10 02:28:09.998707", "step": 2066, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:10.027736", "step": 2066, "epoch": 1 }, { "type": "loss", "content": 0.025672154501080513, "timestamp": "2025-09-10 02:28:10.029472", "step": 2067, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:10.058478", "step": 2067, "epoch": 1 }, { "type": "loss", "content": 0.02726186253130436, "timestamp": "2025-09-10 02:28:10.081733", "step": 2068, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:10.110792", "step": 2068, "epoch": 1 }, { "type": "loss", "content": 0.031530968844890594, "timestamp": "2025-09-10 02:28:10.112476", "step": 2069, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:28:10.141926", "step": 2069, "epoch": 1 }, { "type": "loss", "content": 0.019077647477388382, "timestamp": "2025-09-10 02:28:10.143836", "step": 2070, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:10.172633", "step": 2070, "epoch": 1 }, { "type": "loss", "content": 0.019523994997143745, "timestamp": "2025-09-10 02:28:10.174591", "step": 2071, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:10.203537", "step": 2071, "epoch": 1 }, { "type": "loss", "content": 0.03711892291903496, "timestamp": "2025-09-10 02:28:10.226799", "step": 2072, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:10.255143", "step": 2072, "epoch": 1 }, { "type": "loss", "content": 0.0593751035630703, "timestamp": "2025-09-10 02:28:10.256515", "step": 2073, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:10.284797", "step": 2073, "epoch": 1 }, { "type": "loss", "content": 0.023463675752282143, "timestamp": "2025-09-10 02:28:10.286046", "step": 2074, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:10.314292", "step": 2074, "epoch": 1 }, { "type": "loss", "content": 0.01058922614902258, "timestamp": "2025-09-10 02:28:10.316086", "step": 2075, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:10.344791", "step": 2075, "epoch": 1 }, { "type": "loss", "content": 0.0200516264885664, "timestamp": "2025-09-10 02:28:10.367937", "step": 2076, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:10.396622", "step": 2076, "epoch": 1 }, { "type": "loss", "content": 0.01980191469192505, "timestamp": "2025-09-10 02:28:10.398247", "step": 2077, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:10.426733", "step": 2077, "epoch": 1 }, { "type": "loss", "content": 0.03624066710472107, "timestamp": "2025-09-10 02:28:10.428125", "step": 2078, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:10.456192", "step": 2078, "epoch": 1 }, { "type": "loss", "content": 0.0021096656564623117, "timestamp": "2025-09-10 02:28:10.457380", "step": 2079, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:10.486277", "step": 2079, "epoch": 1 }, { "type": "loss", "content": 0.06231212615966797, "timestamp": "2025-09-10 02:28:10.509552", "step": 2080, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:10.538523", "step": 2080, "epoch": 1 }, { "type": "loss", "content": 0.06326466053724289, "timestamp": "2025-09-10 02:28:10.539980", "step": 2081, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:10.568364", "step": 2081, "epoch": 1 }, { "type": "loss", "content": 0.02771034464240074, "timestamp": "2025-09-10 02:28:10.569889", "step": 2082, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:10.598182", "step": 2082, "epoch": 1 }, { "type": "loss", "content": 0.030195659026503563, "timestamp": "2025-09-10 02:28:10.599804", "step": 2083, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:10.628422", "step": 2083, "epoch": 1 }, { "type": "loss", "content": 0.013478175736963749, "timestamp": "2025-09-10 02:28:10.651349", "step": 2084, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:10.680300", "step": 2084, "epoch": 1 }, { "type": "loss", "content": 0.014244613237679005, "timestamp": "2025-09-10 02:28:10.681839", "step": 2085, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:10.710502", "step": 2085, "epoch": 1 }, { "type": "loss", "content": 0.007503572851419449, "timestamp": "2025-09-10 02:28:10.712098", "step": 2086, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:10.740194", "step": 2086, "epoch": 1 }, { "type": "loss", "content": 0.04743637517094612, "timestamp": "2025-09-10 02:28:10.741566", "step": 2087, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:10.769657", "step": 2087, "epoch": 1 }, { "type": "loss", "content": 0.017474576830863953, "timestamp": "2025-09-10 02:28:10.792657", "step": 2088, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:10.821101", "step": 2088, "epoch": 1 }, { "type": "loss", "content": 0.029541777446866035, "timestamp": "2025-09-10 02:28:10.822849", "step": 2089, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:10.851456", "step": 2089, "epoch": 1 }, { "type": "loss", "content": 0.02945699170231819, "timestamp": "2025-09-10 02:28:10.853323", "step": 2090, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:10.882235", "step": 2090, "epoch": 1 }, { "type": "loss", "content": 0.027768908068537712, "timestamp": "2025-09-10 02:28:10.883636", "step": 2091, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:10.911822", "step": 2091, "epoch": 1 }, { "type": "loss", "content": 0.016978060826659203, "timestamp": "2025-09-10 02:28:10.934886", "step": 2092, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:10.963708", "step": 2092, "epoch": 1 }, { "type": "loss", "content": 0.027785973623394966, "timestamp": "2025-09-10 02:28:10.965267", "step": 2093, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:10.993810", "step": 2093, "epoch": 1 }, { "type": "loss", "content": 0.03528384491801262, "timestamp": "2025-09-10 02:28:10.995299", "step": 2094, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:11.023795", "step": 2094, "epoch": 1 }, { "type": "loss", "content": 0.030607204884290695, "timestamp": "2025-09-10 02:28:11.025203", "step": 2095, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:11.053355", "step": 2095, "epoch": 1 }, { "type": "loss", "content": 0.04359974339604378, "timestamp": "2025-09-10 02:28:11.076310", "step": 2096, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:11.104890", "step": 2096, "epoch": 1 }, { "type": "loss", "content": 0.010347840376198292, "timestamp": "2025-09-10 02:28:11.106445", "step": 2097, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:11.135252", "step": 2097, "epoch": 1 }, { "type": "loss", "content": 0.048572640866041183, "timestamp": "2025-09-10 02:28:11.136631", "step": 2098, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:11.164797", "step": 2098, "epoch": 1 }, { "type": "loss", "content": 0.02608483098447323, "timestamp": "2025-09-10 02:28:11.166461", "step": 2099, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:11.195062", "step": 2099, "epoch": 1 }, { "type": "loss", "content": 0.006108488887548447, "timestamp": "2025-09-10 02:28:11.217957", "step": 2100, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:11.246612", "step": 2100, "epoch": 1 }, { "type": "loss", "content": 0.012660800479352474, "timestamp": "2025-09-10 02:28:11.248218", "step": 2101, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:11.276390", "step": 2101, "epoch": 1 }, { "type": "loss", "content": 0.012901701964437962, "timestamp": "2025-09-10 02:28:11.277977", "step": 2102, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:11.306548", "step": 2102, "epoch": 1 }, { "type": "loss", "content": 0.05675072222948074, "timestamp": "2025-09-10 02:28:11.308198", "step": 2103, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:11.336723", "step": 2103, "epoch": 1 }, { "type": "loss", "content": 0.01660478673875332, "timestamp": "2025-09-10 02:28:11.359924", "step": 2104, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:11.388784", "step": 2104, "epoch": 1 }, { "type": "loss", "content": 0.025345126166939735, "timestamp": "2025-09-10 02:28:11.390690", "step": 2105, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:11.419266", "step": 2105, "epoch": 1 }, { "type": "loss", "content": 0.019086772575974464, "timestamp": "2025-09-10 02:28:11.420836", "step": 2106, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:11.449105", "step": 2106, "epoch": 1 }, { "type": "loss", "content": 0.059554796665906906, "timestamp": "2025-09-10 02:28:11.450308", "step": 2107, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:11.478819", "step": 2107, "epoch": 1 }, { "type": "loss", "content": 0.009006127715110779, "timestamp": "2025-09-10 02:28:11.502164", "step": 2108, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:11.530784", "step": 2108, "epoch": 1 }, { "type": "loss", "content": 0.04445163160562515, "timestamp": "2025-09-10 02:28:11.532035", "step": 2109, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:11.560194", "step": 2109, "epoch": 1 }, { "type": "loss", "content": 0.03971460089087486, "timestamp": "2025-09-10 02:28:11.561679", "step": 2110, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:11.590148", "step": 2110, "epoch": 1 }, { "type": "loss", "content": 0.0253768227994442, "timestamp": "2025-09-10 02:28:11.591758", "step": 2111, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:11.620088", "step": 2111, "epoch": 1 }, { "type": "loss", "content": 0.040283214300870895, "timestamp": "2025-09-10 02:28:11.643139", "step": 2112, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:11.671564", "step": 2112, "epoch": 1 }, { "type": "loss", "content": 0.010990321636199951, "timestamp": "2025-09-10 02:28:11.673005", "step": 2113, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:11.701603", "step": 2113, "epoch": 1 }, { "type": "loss", "content": 0.013297955505549908, "timestamp": "2025-09-10 02:28:11.703200", "step": 2114, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:11.731988", "step": 2114, "epoch": 1 }, { "type": "loss", "content": 0.023308461531996727, "timestamp": "2025-09-10 02:28:11.733659", "step": 2115, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:11.762253", "step": 2115, "epoch": 1 }, { "type": "loss", "content": 0.06439714133739471, "timestamp": "2025-09-10 02:28:11.785204", "step": 2116, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:11.813957", "step": 2116, "epoch": 1 }, { "type": "loss", "content": 0.010951301082968712, "timestamp": "2025-09-10 02:28:11.815631", "step": 2117, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:11.844010", "step": 2117, "epoch": 1 }, { "type": "loss", "content": 0.008319989778101444, "timestamp": "2025-09-10 02:28:11.845547", "step": 2118, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:11.873815", "step": 2118, "epoch": 1 }, { "type": "loss", "content": 0.03934324532747269, "timestamp": "2025-09-10 02:28:11.875466", "step": 2119, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:11.903971", "step": 2119, "epoch": 1 }, { "type": "loss", "content": 0.04784102365374565, "timestamp": "2025-09-10 02:28:11.927279", "step": 2120, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:11.955591", "step": 2120, "epoch": 1 }, { "type": "loss", "content": 0.040347445756196976, "timestamp": "2025-09-10 02:28:11.957131", "step": 2121, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:11.985694", "step": 2121, "epoch": 1 }, { "type": "loss", "content": 0.008014153689146042, "timestamp": "2025-09-10 02:28:11.987072", "step": 2122, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:12.015545", "step": 2122, "epoch": 1 }, { "type": "loss", "content": 0.052506666630506516, "timestamp": "2025-09-10 02:28:12.016943", "step": 2123, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:12.044970", "step": 2123, "epoch": 1 }, { "type": "loss", "content": 0.011240340769290924, "timestamp": "2025-09-10 02:28:12.068151", "step": 2124, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:12.096596", "step": 2124, "epoch": 1 }, { "type": "loss", "content": 0.010675467550754547, "timestamp": "2025-09-10 02:28:12.098235", "step": 2125, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:12.126842", "step": 2125, "epoch": 1 }, { "type": "loss", "content": 0.006910696625709534, "timestamp": "2025-09-10 02:28:12.128405", "step": 2126, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:12.157267", "step": 2126, "epoch": 1 }, { "type": "loss", "content": 0.00909285806119442, "timestamp": "2025-09-10 02:28:12.158680", "step": 2127, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:12.187211", "step": 2127, "epoch": 1 }, { "type": "loss", "content": 0.01328167226165533, "timestamp": "2025-09-10 02:28:12.210113", "step": 2128, "epoch": 1 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 96 ], "batch_size": 8, "flops": 1898404492032 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 5, 80 ], "batch_size": 8, "flops": 1582003754624 } ], "timestamp": "2025-09-10 02:28:14.066829", "step": 2128, "epoch": 1 }, { "type": "pplx", "content": 2282446.507026665, "timestamp": "2025-09-10 02:28:14.068360", "step": 2128, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:14.095523", "step": 2128, "epoch": 1 }, { "type": "loss", "content": 0.06975377351045609, "timestamp": "2025-09-10 02:28:14.097186", "step": 2129, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:14.125963", "step": 2129, "epoch": 1 }, { "type": "loss", "content": 0.010947045870125294, "timestamp": "2025-09-10 02:28:14.127381", "step": 2130, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:14.156648", "step": 2130, "epoch": 1 }, { "type": "loss", "content": 0.006454108748584986, "timestamp": "2025-09-10 02:28:14.158172", "step": 2131, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:14.186675", "step": 2131, "epoch": 1 }, { "type": "loss", "content": 0.03406180068850517, "timestamp": "2025-09-10 02:28:14.209765", "step": 2132, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:14.238570", "step": 2132, "epoch": 1 }, { "type": "loss", "content": 0.03409416601061821, "timestamp": "2025-09-10 02:28:14.239945", "step": 2133, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:14.268024", "step": 2133, "epoch": 1 }, { "type": "loss", "content": 0.014770114794373512, "timestamp": "2025-09-10 02:28:14.269418", "step": 2134, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:14.298132", "step": 2134, "epoch": 1 }, { "type": "loss", "content": 0.047664035111665726, "timestamp": "2025-09-10 02:28:14.299551", "step": 2135, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:14.327775", "step": 2135, "epoch": 1 }, { "type": "loss", "content": 0.015450273640453815, "timestamp": "2025-09-10 02:28:14.350895", "step": 2136, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:14.380306", "step": 2136, "epoch": 1 }, { "type": "loss", "content": 0.00842917338013649, "timestamp": "2025-09-10 02:28:14.382027", "step": 2137, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:14.410073", "step": 2137, "epoch": 1 }, { "type": "loss", "content": 0.023178046569228172, "timestamp": "2025-09-10 02:28:14.412117", "step": 2138, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:14.440609", "step": 2138, "epoch": 1 }, { "type": "loss", "content": 0.025276973843574524, "timestamp": "2025-09-10 02:28:14.442238", "step": 2139, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:14.470989", "step": 2139, "epoch": 1 }, { "type": "loss", "content": 0.0542147234082222, "timestamp": "2025-09-10 02:28:14.494224", "step": 2140, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:14.522791", "step": 2140, "epoch": 1 }, { "type": "loss", "content": 0.04216999188065529, "timestamp": "2025-09-10 02:28:14.524452", "step": 2141, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:14.552879", "step": 2141, "epoch": 1 }, { "type": "loss", "content": 0.01685541495680809, "timestamp": "2025-09-10 02:28:14.554592", "step": 2142, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:14.583315", "step": 2142, "epoch": 1 }, { "type": "loss", "content": 0.011981974355876446, "timestamp": "2025-09-10 02:28:14.585082", "step": 2143, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:14.613937", "step": 2143, "epoch": 1 }, { "type": "loss", "content": 0.012850151397287846, "timestamp": "2025-09-10 02:28:14.636827", "step": 2144, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:14.665470", "step": 2144, "epoch": 1 }, { "type": "loss", "content": 0.01151515543460846, "timestamp": "2025-09-10 02:28:14.666970", "step": 2145, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:14.695149", "step": 2145, "epoch": 1 }, { "type": "loss", "content": 0.022794952616095543, "timestamp": "2025-09-10 02:28:14.696729", "step": 2146, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:14.724795", "step": 2146, "epoch": 1 }, { "type": "loss", "content": 0.03026963397860527, "timestamp": "2025-09-10 02:28:14.726416", "step": 2147, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:14.754918", "step": 2147, "epoch": 1 }, { "type": "loss", "content": 0.02113904245197773, "timestamp": "2025-09-10 02:28:14.777810", "step": 2148, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:14.806391", "step": 2148, "epoch": 1 }, { "type": "loss", "content": 0.03410894051194191, "timestamp": "2025-09-10 02:28:14.808025", "step": 2149, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:14.836256", "step": 2149, "epoch": 1 }, { "type": "loss", "content": 0.027280699461698532, "timestamp": "2025-09-10 02:28:14.837626", "step": 2150, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:14.865692", "step": 2150, "epoch": 1 }, { "type": "loss", "content": 0.038459666073322296, "timestamp": "2025-09-10 02:28:14.867182", "step": 2151, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:14.895659", "step": 2151, "epoch": 1 }, { "type": "loss", "content": 0.02917073294520378, "timestamp": "2025-09-10 02:28:14.918680", "step": 2152, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:14.947785", "step": 2152, "epoch": 1 }, { "type": "loss", "content": 0.011971733532845974, "timestamp": "2025-09-10 02:28:14.949302", "step": 2153, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:14.977470", "step": 2153, "epoch": 1 }, { "type": "loss", "content": 0.04599002003669739, "timestamp": "2025-09-10 02:28:14.979151", "step": 2154, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:15.007861", "step": 2154, "epoch": 1 }, { "type": "loss", "content": 0.07949955016374588, "timestamp": "2025-09-10 02:28:15.009499", "step": 2155, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:15.037708", "step": 2155, "epoch": 1 }, { "type": "loss", "content": 0.052912481129169464, "timestamp": "2025-09-10 02:28:15.060939", "step": 2156, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:15.089602", "step": 2156, "epoch": 1 }, { "type": "loss", "content": 0.03489892557263374, "timestamp": "2025-09-10 02:28:15.091298", "step": 2157, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:15.120021", "step": 2157, "epoch": 1 }, { "type": "loss", "content": 0.02402343414723873, "timestamp": "2025-09-10 02:28:15.121598", "step": 2158, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:15.149930", "step": 2158, "epoch": 1 }, { "type": "loss", "content": 0.038121890276670456, "timestamp": "2025-09-10 02:28:15.151364", "step": 2159, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:15.180789", "step": 2159, "epoch": 1 }, { "type": "loss", "content": 0.007009260356426239, "timestamp": "2025-09-10 02:28:15.203994", "step": 2160, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:15.232957", "step": 2160, "epoch": 1 }, { "type": "loss", "content": 0.023040596395730972, "timestamp": "2025-09-10 02:28:15.234612", "step": 2161, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:15.262977", "step": 2161, "epoch": 1 }, { "type": "loss", "content": 0.025174299255013466, "timestamp": "2025-09-10 02:28:15.264440", "step": 2162, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:15.293054", "step": 2162, "epoch": 1 }, { "type": "loss", "content": 0.01304810680449009, "timestamp": "2025-09-10 02:28:15.294485", "step": 2163, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:15.322604", "step": 2163, "epoch": 1 }, { "type": "loss", "content": 0.002673085778951645, "timestamp": "2025-09-10 02:28:15.345605", "step": 2164, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:15.374156", "step": 2164, "epoch": 1 }, { "type": "loss", "content": 0.026758363470435143, "timestamp": "2025-09-10 02:28:15.375557", "step": 2165, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:15.403925", "step": 2165, "epoch": 1 }, { "type": "loss", "content": 0.04663803428411484, "timestamp": "2025-09-10 02:28:15.405442", "step": 2166, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:15.433661", "step": 2166, "epoch": 1 }, { "type": "loss", "content": 0.033424012362957, "timestamp": "2025-09-10 02:28:15.435333", "step": 2167, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:15.463712", "step": 2167, "epoch": 1 }, { "type": "loss", "content": 0.0035018210764974356, "timestamp": "2025-09-10 02:28:15.486713", "step": 2168, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:15.515068", "step": 2168, "epoch": 1 }, { "type": "loss", "content": 0.0530601367354393, "timestamp": "2025-09-10 02:28:15.516557", "step": 2169, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:15.544791", "step": 2169, "epoch": 1 }, { "type": "loss", "content": 0.01699506677687168, "timestamp": "2025-09-10 02:28:15.546428", "step": 2170, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:15.574960", "step": 2170, "epoch": 1 }, { "type": "loss", "content": 0.009327538311481476, "timestamp": "2025-09-10 02:28:15.576832", "step": 2171, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:15.605615", "step": 2171, "epoch": 1 }, { "type": "loss", "content": 0.029642315581440926, "timestamp": "2025-09-10 02:28:15.628876", "step": 2172, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:15.658127", "step": 2172, "epoch": 1 }, { "type": "loss", "content": 0.012382798828184605, "timestamp": "2025-09-10 02:28:15.659656", "step": 2173, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:15.687953", "step": 2173, "epoch": 1 }, { "type": "loss", "content": 0.01727687008678913, "timestamp": "2025-09-10 02:28:15.689390", "step": 2174, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:15.718095", "step": 2174, "epoch": 1 }, { "type": "loss", "content": 0.010606663301587105, "timestamp": "2025-09-10 02:28:15.719770", "step": 2175, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:15.747803", "step": 2175, "epoch": 1 }, { "type": "loss", "content": 0.023922478780150414, "timestamp": "2025-09-10 02:28:15.770826", "step": 2176, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:15.800258", "step": 2176, "epoch": 1 }, { "type": "loss", "content": 0.024894535541534424, "timestamp": "2025-09-10 02:28:15.801660", "step": 2177, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:15.830065", "step": 2177, "epoch": 1 }, { "type": "loss", "content": 0.014281758107244968, "timestamp": "2025-09-10 02:28:15.831532", "step": 2178, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:15.860395", "step": 2178, "epoch": 1 }, { "type": "loss", "content": 0.04998927190899849, "timestamp": "2025-09-10 02:28:15.861963", "step": 2179, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:15.890152", "step": 2179, "epoch": 1 }, { "type": "loss", "content": 0.008675232529640198, "timestamp": "2025-09-10 02:28:15.913368", "step": 2180, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:15.941803", "step": 2180, "epoch": 1 }, { "type": "loss", "content": 0.0068986122496426105, "timestamp": "2025-09-10 02:28:15.943339", "step": 2181, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:15.972064", "step": 2181, "epoch": 1 }, { "type": "loss", "content": 0.11237634718418121, "timestamp": "2025-09-10 02:28:15.973595", "step": 2182, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:16.001845", "step": 2182, "epoch": 1 }, { "type": "loss", "content": 0.03960355743765831, "timestamp": "2025-09-10 02:28:16.003188", "step": 2183, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:16.031808", "step": 2183, "epoch": 1 }, { "type": "loss", "content": 0.01569380983710289, "timestamp": "2025-09-10 02:28:16.054885", "step": 2184, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:16.083415", "step": 2184, "epoch": 1 }, { "type": "loss", "content": 0.038847409188747406, "timestamp": "2025-09-10 02:28:16.084946", "step": 2185, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:16.113582", "step": 2185, "epoch": 1 }, { "type": "loss", "content": 0.037907857447862625, "timestamp": "2025-09-10 02:28:16.115104", "step": 2186, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:16.144296", "step": 2186, "epoch": 1 }, { "type": "loss", "content": 0.018548624590039253, "timestamp": "2025-09-10 02:28:16.146005", "step": 2187, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:16.174502", "step": 2187, "epoch": 1 }, { "type": "loss", "content": 0.04279320314526558, "timestamp": "2025-09-10 02:28:16.197629", "step": 2188, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:16.226160", "step": 2188, "epoch": 1 }, { "type": "loss", "content": 0.022673947736620903, "timestamp": "2025-09-10 02:28:16.227867", "step": 2189, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:16.256298", "step": 2189, "epoch": 1 }, { "type": "loss", "content": 0.010001570917665958, "timestamp": "2025-09-10 02:28:16.257674", "step": 2190, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:16.286825", "step": 2190, "epoch": 1 }, { "type": "loss", "content": 0.008761915378272533, "timestamp": "2025-09-10 02:28:16.288342", "step": 2191, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:16.317155", "step": 2191, "epoch": 1 }, { "type": "loss", "content": 0.027428340166807175, "timestamp": "2025-09-10 02:28:16.340253", "step": 2192, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:16.369200", "step": 2192, "epoch": 1 }, { "type": "loss", "content": 0.011549754999577999, "timestamp": "2025-09-10 02:28:16.370661", "step": 2193, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:16.399156", "step": 2193, "epoch": 1 }, { "type": "loss", "content": 0.022620776668190956, "timestamp": "2025-09-10 02:28:16.400643", "step": 2194, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:16.429394", "step": 2194, "epoch": 1 }, { "type": "loss", "content": 0.052805330604314804, "timestamp": "2025-09-10 02:28:16.430799", "step": 2195, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:16.459301", "step": 2195, "epoch": 1 }, { "type": "loss", "content": 0.03661612421274185, "timestamp": "2025-09-10 02:28:16.482315", "step": 2196, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:16.510440", "step": 2196, "epoch": 1 }, { "type": "loss", "content": 0.025274012237787247, "timestamp": "2025-09-10 02:28:16.511841", "step": 2197, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:16.540215", "step": 2197, "epoch": 1 }, { "type": "loss", "content": 0.017450639978051186, "timestamp": "2025-09-10 02:28:16.541604", "step": 2198, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:16.569799", "step": 2198, "epoch": 1 }, { "type": "loss", "content": 0.012563006021082401, "timestamp": "2025-09-10 02:28:16.571289", "step": 2199, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:16.599678", "step": 2199, "epoch": 1 }, { "type": "loss", "content": 0.05560094490647316, "timestamp": "2025-09-10 02:28:16.622968", "step": 2200, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:16.651961", "step": 2200, "epoch": 1 }, { "type": "loss", "content": 0.04848475009202957, "timestamp": "2025-09-10 02:28:16.653602", "step": 2201, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:16.681800", "step": 2201, "epoch": 1 }, { "type": "loss", "content": 0.050533466041088104, "timestamp": "2025-09-10 02:28:16.683409", "step": 2202, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:16.711967", "step": 2202, "epoch": 1 }, { "type": "loss", "content": 0.030187392607331276, "timestamp": "2025-09-10 02:28:16.713405", "step": 2203, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:16.741920", "step": 2203, "epoch": 1 }, { "type": "loss", "content": 0.02421838603913784, "timestamp": "2025-09-10 02:28:16.765120", "step": 2204, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:16.793666", "step": 2204, "epoch": 1 }, { "type": "loss", "content": 0.05683031678199768, "timestamp": "2025-09-10 02:28:16.795290", "step": 2205, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:16.823954", "step": 2205, "epoch": 1 }, { "type": "loss", "content": 0.06156497076153755, "timestamp": "2025-09-10 02:28:16.825409", "step": 2206, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:16.853914", "step": 2206, "epoch": 1 }, { "type": "loss", "content": 0.007874163798987865, "timestamp": "2025-09-10 02:28:16.855426", "step": 2207, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:16.883797", "step": 2207, "epoch": 1 }, { "type": "loss", "content": 0.029922401532530785, "timestamp": "2025-09-10 02:28:16.906844", "step": 2208, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:16.935189", "step": 2208, "epoch": 1 }, { "type": "loss", "content": 0.06222435459494591, "timestamp": "2025-09-10 02:28:16.937827", "step": 2209, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:16.969080", "step": 2209, "epoch": 1 }, { "type": "loss", "content": 0.01762712560594082, "timestamp": "2025-09-10 02:28:16.970465", "step": 2210, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:16.999106", "step": 2210, "epoch": 1 }, { "type": "loss", "content": 0.015461350791156292, "timestamp": "2025-09-10 02:28:17.000537", "step": 2211, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:17.029163", "step": 2211, "epoch": 1 }, { "type": "loss", "content": 0.01912308856844902, "timestamp": "2025-09-10 02:28:17.052150", "step": 2212, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:17.080227", "step": 2212, "epoch": 1 }, { "type": "loss", "content": 0.044046707451343536, "timestamp": "2025-09-10 02:28:17.081967", "step": 2213, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:17.110555", "step": 2213, "epoch": 1 }, { "type": "loss", "content": 0.01707787811756134, "timestamp": "2025-09-10 02:28:17.112246", "step": 2214, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:17.141271", "step": 2214, "epoch": 1 }, { "type": "loss", "content": 0.013046731241047382, "timestamp": "2025-09-10 02:28:17.143008", "step": 2215, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:17.171636", "step": 2215, "epoch": 1 }, { "type": "loss", "content": 0.06424341350793839, "timestamp": "2025-09-10 02:28:17.194825", "step": 2216, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:17.223699", "step": 2216, "epoch": 1 }, { "type": "loss", "content": 0.014600231312215328, "timestamp": "2025-09-10 02:28:17.225474", "step": 2217, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:17.254045", "step": 2217, "epoch": 1 }, { "type": "loss", "content": 0.023384276777505875, "timestamp": "2025-09-10 02:28:17.255831", "step": 2218, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:17.284159", "step": 2218, "epoch": 1 }, { "type": "loss", "content": 0.021726815029978752, "timestamp": "2025-09-10 02:28:17.286007", "step": 2219, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:17.314656", "step": 2219, "epoch": 1 }, { "type": "loss", "content": 0.007144995033740997, "timestamp": "2025-09-10 02:28:17.337890", "step": 2220, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:17.366917", "step": 2220, "epoch": 1 }, { "type": "loss", "content": 0.026717673987150192, "timestamp": "2025-09-10 02:28:17.368819", "step": 2221, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:17.397796", "step": 2221, "epoch": 1 }, { "type": "loss", "content": 0.009127212688326836, "timestamp": "2025-09-10 02:28:17.399869", "step": 2222, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:17.429062", "step": 2222, "epoch": 1 }, { "type": "loss", "content": 0.011890118941664696, "timestamp": "2025-09-10 02:28:17.430938", "step": 2223, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:17.460264", "step": 2223, "epoch": 1 }, { "type": "loss", "content": 0.016838667914271355, "timestamp": "2025-09-10 02:28:17.483544", "step": 2224, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:17.514288", "step": 2224, "epoch": 1 }, { "type": "loss", "content": 0.0234785545617342, "timestamp": "2025-09-10 02:28:17.515975", "step": 2225, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:17.544427", "step": 2225, "epoch": 1 }, { "type": "loss", "content": 0.028258562088012695, "timestamp": "2025-09-10 02:28:17.546008", "step": 2226, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:17.574318", "step": 2226, "epoch": 1 }, { "type": "loss", "content": 0.03367723897099495, "timestamp": "2025-09-10 02:28:17.575865", "step": 2227, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:17.604297", "step": 2227, "epoch": 1 }, { "type": "loss", "content": 0.03372273966670036, "timestamp": "2025-09-10 02:28:17.627345", "step": 2228, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:17.657042", "step": 2228, "epoch": 1 }, { "type": "loss", "content": 0.026078801602125168, "timestamp": "2025-09-10 02:28:17.658893", "step": 2229, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:17.688098", "step": 2229, "epoch": 1 }, { "type": "loss", "content": 0.008887152187526226, "timestamp": "2025-09-10 02:28:17.690114", "step": 2230, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:17.718801", "step": 2230, "epoch": 1 }, { "type": "loss", "content": 0.028703680261969566, "timestamp": "2025-09-10 02:28:17.720487", "step": 2231, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:17.749353", "step": 2231, "epoch": 1 }, { "type": "loss", "content": 0.007377589587122202, "timestamp": "2025-09-10 02:28:17.772773", "step": 2232, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:17.801632", "step": 2232, "epoch": 1 }, { "type": "loss", "content": 0.049235738813877106, "timestamp": "2025-09-10 02:28:17.803287", "step": 2233, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:17.832031", "step": 2233, "epoch": 1 }, { "type": "loss", "content": 0.02330530807375908, "timestamp": "2025-09-10 02:28:17.833874", "step": 2234, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:17.862187", "step": 2234, "epoch": 1 }, { "type": "loss", "content": 0.025780126452445984, "timestamp": "2025-09-10 02:28:17.863777", "step": 2235, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:17.892202", "step": 2235, "epoch": 1 }, { "type": "loss", "content": 0.03770305588841438, "timestamp": "2025-09-10 02:28:17.915590", "step": 2236, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:17.944436", "step": 2236, "epoch": 1 }, { "type": "loss", "content": 0.04107164219021797, "timestamp": "2025-09-10 02:28:17.946249", "step": 2237, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:17.975112", "step": 2237, "epoch": 1 }, { "type": "loss", "content": 0.03411278501152992, "timestamp": "2025-09-10 02:28:17.976925", "step": 2238, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:18.005288", "step": 2238, "epoch": 1 }, { "type": "loss", "content": 0.05005975812673569, "timestamp": "2025-09-10 02:28:18.007055", "step": 2239, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:18.035926", "step": 2239, "epoch": 1 }, { "type": "loss", "content": 0.00835686270147562, "timestamp": "2025-09-10 02:28:18.059239", "step": 2240, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:18.088976", "step": 2240, "epoch": 1 }, { "type": "loss", "content": 0.02591152861714363, "timestamp": "2025-09-10 02:28:18.090565", "step": 2241, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:18.119081", "step": 2241, "epoch": 1 }, { "type": "loss", "content": 0.028668338432908058, "timestamp": "2025-09-10 02:28:18.120645", "step": 2242, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:18.148982", "step": 2242, "epoch": 1 }, { "type": "loss", "content": 0.03449665382504463, "timestamp": "2025-09-10 02:28:18.150744", "step": 2243, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:18.179697", "step": 2243, "epoch": 1 }, { "type": "loss", "content": 0.03282909840345383, "timestamp": "2025-09-10 02:28:18.204131", "step": 2244, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:18.233233", "step": 2244, "epoch": 1 }, { "type": "loss", "content": 0.027261529117822647, "timestamp": "2025-09-10 02:28:18.234873", "step": 2245, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:18.263332", "step": 2245, "epoch": 1 }, { "type": "loss", "content": 0.05301208049058914, "timestamp": "2025-09-10 02:28:18.265351", "step": 2246, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:18.294271", "step": 2246, "epoch": 1 }, { "type": "loss", "content": 0.0440392792224884, "timestamp": "2025-09-10 02:28:18.296080", "step": 2247, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:18.324712", "step": 2247, "epoch": 1 }, { "type": "loss", "content": 0.018889544531702995, "timestamp": "2025-09-10 02:28:18.348036", "step": 2248, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:18.376948", "step": 2248, "epoch": 1 }, { "type": "loss", "content": 0.020905017852783203, "timestamp": "2025-09-10 02:28:18.378676", "step": 2249, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:18.407163", "step": 2249, "epoch": 1 }, { "type": "loss", "content": 0.005557719152420759, "timestamp": "2025-09-10 02:28:18.409099", "step": 2250, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:18.437819", "step": 2250, "epoch": 1 }, { "type": "loss", "content": 0.02418561466038227, "timestamp": "2025-09-10 02:28:18.439585", "step": 2251, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:18.468127", "step": 2251, "epoch": 1 }, { "type": "loss", "content": 0.008221170864999294, "timestamp": "2025-09-10 02:28:18.491396", "step": 2252, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:18.520676", "step": 2252, "epoch": 1 }, { "type": "loss", "content": 0.057192351669073105, "timestamp": "2025-09-10 02:28:18.522478", "step": 2253, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:18.551660", "step": 2253, "epoch": 1 }, { "type": "loss", "content": 0.04847079887986183, "timestamp": "2025-09-10 02:28:18.553501", "step": 2254, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:18.582710", "step": 2254, "epoch": 1 }, { "type": "loss", "content": 0.008429114706814289, "timestamp": "2025-09-10 02:28:18.584885", "step": 2255, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:18.614160", "step": 2255, "epoch": 1 }, { "type": "loss", "content": 0.050681017339229584, "timestamp": "2025-09-10 02:28:18.637548", "step": 2256, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:18.666711", "step": 2256, "epoch": 1 }, { "type": "loss", "content": 0.010908703319728374, "timestamp": "2025-09-10 02:28:18.668414", "step": 2257, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:18.697302", "step": 2257, "epoch": 1 }, { "type": "loss", "content": 0.06985725462436676, "timestamp": "2025-09-10 02:28:18.699233", "step": 2258, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:18.728114", "step": 2258, "epoch": 1 }, { "type": "loss", "content": 0.02067830041050911, "timestamp": "2025-09-10 02:28:18.730038", "step": 2259, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:18.758412", "step": 2259, "epoch": 1 }, { "type": "loss", "content": 0.012844773940742016, "timestamp": "2025-09-10 02:28:18.781691", "step": 2260, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:18.810600", "step": 2260, "epoch": 1 }, { "type": "loss", "content": 0.03253468871116638, "timestamp": "2025-09-10 02:28:18.812226", "step": 2261, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:18.844068", "step": 2261, "epoch": 1 }, { "type": "loss", "content": 0.03450094908475876, "timestamp": "2025-09-10 02:28:18.845693", "step": 2262, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:18.874417", "step": 2262, "epoch": 1 }, { "type": "loss", "content": 0.022504892200231552, "timestamp": "2025-09-10 02:28:18.875815", "step": 2263, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:18.904083", "step": 2263, "epoch": 1 }, { "type": "loss", "content": 0.0401475764811039, "timestamp": "2025-09-10 02:28:18.927227", "step": 2264, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:18.956319", "step": 2264, "epoch": 1 }, { "type": "loss", "content": 0.027728352695703506, "timestamp": "2025-09-10 02:28:18.958162", "step": 2265, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:18.986592", "step": 2265, "epoch": 1 }, { "type": "loss", "content": 0.07560262829065323, "timestamp": "2025-09-10 02:28:18.988460", "step": 2266, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:19.017184", "step": 2266, "epoch": 1 }, { "type": "loss", "content": 0.040750812739133835, "timestamp": "2025-09-10 02:28:19.018987", "step": 2267, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:19.047728", "step": 2267, "epoch": 1 }, { "type": "loss", "content": 0.02107156440615654, "timestamp": "2025-09-10 02:28:19.071111", "step": 2268, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:19.099408", "step": 2268, "epoch": 1 }, { "type": "loss", "content": 0.03714641183614731, "timestamp": "2025-09-10 02:28:19.101161", "step": 2269, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:19.129974", "step": 2269, "epoch": 1 }, { "type": "loss", "content": 0.056046221405267715, "timestamp": "2025-09-10 02:28:19.131866", "step": 2270, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:19.160272", "step": 2270, "epoch": 1 }, { "type": "loss", "content": 0.013576915487647057, "timestamp": "2025-09-10 02:28:19.161962", "step": 2271, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:19.190806", "step": 2271, "epoch": 1 }, { "type": "loss", "content": 0.05251288786530495, "timestamp": "2025-09-10 02:28:19.214233", "step": 2272, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:19.243421", "step": 2272, "epoch": 1 }, { "type": "loss", "content": 0.03815519064664841, "timestamp": "2025-09-10 02:28:19.245174", "step": 2273, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:19.273712", "step": 2273, "epoch": 1 }, { "type": "loss", "content": 0.010572902858257294, "timestamp": "2025-09-10 02:28:19.275260", "step": 2274, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:19.304198", "step": 2274, "epoch": 1 }, { "type": "loss", "content": 0.021908242255449295, "timestamp": "2025-09-10 02:28:19.305790", "step": 2275, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:19.334197", "step": 2275, "epoch": 1 }, { "type": "loss", "content": 0.015945082530379295, "timestamp": "2025-09-10 02:28:19.357196", "step": 2276, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:19.385567", "step": 2276, "epoch": 1 }, { "type": "loss", "content": 0.03457672521471977, "timestamp": "2025-09-10 02:28:19.387171", "step": 2277, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:19.415331", "step": 2277, "epoch": 1 }, { "type": "loss", "content": 0.0339481346309185, "timestamp": "2025-09-10 02:28:19.416723", "step": 2278, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:19.445017", "step": 2278, "epoch": 1 }, { "type": "loss", "content": 0.0076408893801271915, "timestamp": "2025-09-10 02:28:19.446861", "step": 2279, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:19.475794", "step": 2279, "epoch": 1 }, { "type": "loss", "content": 0.033159803599119186, "timestamp": "2025-09-10 02:28:19.498977", "step": 2280, "epoch": 1 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 96 ], "batch_size": 8, "flops": 1898404492032 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 5, 80 ], "batch_size": 8, "flops": 1582003754624 } ], "timestamp": "2025-09-10 02:28:21.429802", "step": 2280, "epoch": 1 }, { "type": "pplx", "content": 2403803.710891873, "timestamp": "2025-09-10 02:28:21.431776", "step": 2280, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:21.459724", "step": 2280, "epoch": 1 }, { "type": "loss", "content": 0.03758122771978378, "timestamp": "2025-09-10 02:28:21.461570", "step": 2281, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:21.497252", "step": 2281, "epoch": 1 }, { "type": "loss", "content": 0.023119444027543068, "timestamp": "2025-09-10 02:28:21.499893", "step": 2282, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:21.537046", "step": 2282, "epoch": 1 }, { "type": "loss", "content": 0.013347284868359566, "timestamp": "2025-09-10 02:28:21.538981", "step": 2283, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:21.567469", "step": 2283, "epoch": 1 }, { "type": "loss", "content": 0.04204104468226433, "timestamp": "2025-09-10 02:28:21.591244", "step": 2284, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:21.625555", "step": 2284, "epoch": 1 }, { "type": "loss", "content": 0.013740544207394123, "timestamp": "2025-09-10 02:28:21.627380", "step": 2285, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:21.657659", "step": 2285, "epoch": 1 }, { "type": "loss", "content": 0.021617084741592407, "timestamp": "2025-09-10 02:28:21.659424", "step": 2286, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:21.689384", "step": 2286, "epoch": 1 }, { "type": "loss", "content": 0.03826426342129707, "timestamp": "2025-09-10 02:28:21.691136", "step": 2287, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:21.719949", "step": 2287, "epoch": 1 }, { "type": "loss", "content": 0.01925254799425602, "timestamp": "2025-09-10 02:28:21.745498", "step": 2288, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:21.780114", "step": 2288, "epoch": 1 }, { "type": "loss", "content": 0.025817211717367172, "timestamp": "2025-09-10 02:28:21.788792", "step": 2289, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:21.824458", "step": 2289, "epoch": 1 }, { "type": "loss", "content": 0.0554530955851078, "timestamp": "2025-09-10 02:28:21.826352", "step": 2290, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:21.856428", "step": 2290, "epoch": 1 }, { "type": "loss", "content": 0.029752230271697044, "timestamp": "2025-09-10 02:28:21.858270", "step": 2291, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:28:21.888547", "step": 2291, "epoch": 1 }, { "type": "loss", "content": 0.01790672540664673, "timestamp": "2025-09-10 02:28:21.911910", "step": 2292, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:21.941686", "step": 2292, "epoch": 1 }, { "type": "loss", "content": 0.003182124812155962, "timestamp": "2025-09-10 02:28:21.943268", "step": 2293, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:21.972996", "step": 2293, "epoch": 1 }, { "type": "loss", "content": 0.035757869482040405, "timestamp": "2025-09-10 02:28:21.974715", "step": 2294, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:22.008895", "step": 2294, "epoch": 1 }, { "type": "loss", "content": 0.04373505339026451, "timestamp": "2025-09-10 02:28:22.014160", "step": 2295, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:22.057135", "step": 2295, "epoch": 1 }, { "type": "loss", "content": 0.02016693353652954, "timestamp": "2025-09-10 02:28:22.080508", "step": 2296, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:22.109234", "step": 2296, "epoch": 1 }, { "type": "loss", "content": 0.025444846600294113, "timestamp": "2025-09-10 02:28:22.116233", "step": 2297, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:22.148342", "step": 2297, "epoch": 1 }, { "type": "loss", "content": 0.018856560811400414, "timestamp": "2025-09-10 02:28:22.150694", "step": 2298, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:22.179919", "step": 2298, "epoch": 1 }, { "type": "loss", "content": 0.017691995948553085, "timestamp": "2025-09-10 02:28:22.181559", "step": 2299, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:22.213359", "step": 2299, "epoch": 1 }, { "type": "loss", "content": 0.02185535989701748, "timestamp": "2025-09-10 02:28:22.237290", "step": 2300, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:22.266702", "step": 2300, "epoch": 1 }, { "type": "loss", "content": 0.027811145409941673, "timestamp": "2025-09-10 02:28:22.268282", "step": 2301, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:22.297528", "step": 2301, "epoch": 1 }, { "type": "loss", "content": 0.010415504686534405, "timestamp": "2025-09-10 02:28:22.299360", "step": 2302, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:22.328430", "step": 2302, "epoch": 1 }, { "type": "loss", "content": 0.044645339250564575, "timestamp": "2025-09-10 02:28:22.334628", "step": 2303, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:22.368138", "step": 2303, "epoch": 1 }, { "type": "loss", "content": 0.044401075690984726, "timestamp": "2025-09-10 02:28:22.391698", "step": 2304, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:22.420430", "step": 2304, "epoch": 1 }, { "type": "loss", "content": 0.03624344989657402, "timestamp": "2025-09-10 02:28:22.424397", "step": 2305, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:22.455969", "step": 2305, "epoch": 1 }, { "type": "loss", "content": 0.008225697092711926, "timestamp": "2025-09-10 02:28:22.459334", "step": 2306, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:22.489543", "step": 2306, "epoch": 1 }, { "type": "loss", "content": 0.005880584474653006, "timestamp": "2025-09-10 02:28:22.491068", "step": 2307, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:22.520533", "step": 2307, "epoch": 1 }, { "type": "loss", "content": 0.017149990424513817, "timestamp": "2025-09-10 02:28:22.543728", "step": 2308, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:22.572790", "step": 2308, "epoch": 1 }, { "type": "loss", "content": 0.022219756618142128, "timestamp": "2025-09-10 02:28:22.575380", "step": 2309, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:22.604584", "step": 2309, "epoch": 1 }, { "type": "loss", "content": 0.04094681143760681, "timestamp": "2025-09-10 02:28:22.608412", "step": 2310, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:22.638363", "step": 2310, "epoch": 1 }, { "type": "loss", "content": 0.04218247905373573, "timestamp": "2025-09-10 02:28:22.640481", "step": 2311, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:22.669377", "step": 2311, "epoch": 1 }, { "type": "loss", "content": 0.015587793663144112, "timestamp": "2025-09-10 02:28:22.692837", "step": 2312, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:22.722106", "step": 2312, "epoch": 1 }, { "type": "loss", "content": 0.03662145137786865, "timestamp": "2025-09-10 02:28:22.724084", "step": 2313, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:22.752690", "step": 2313, "epoch": 1 }, { "type": "loss", "content": 0.04511374607682228, "timestamp": "2025-09-10 02:28:22.754660", "step": 2314, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:22.784034", "step": 2314, "epoch": 1 }, { "type": "loss", "content": 0.0072469934821128845, "timestamp": "2025-09-10 02:28:22.785842", "step": 2315, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:22.815703", "step": 2315, "epoch": 1 }, { "type": "loss", "content": 0.06467636674642563, "timestamp": "2025-09-10 02:28:22.843148", "step": 2316, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:22.872120", "step": 2316, "epoch": 1 }, { "type": "loss", "content": 0.02188717946410179, "timestamp": "2025-09-10 02:28:22.874010", "step": 2317, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:22.906706", "step": 2317, "epoch": 1 }, { "type": "loss", "content": 0.002025757683441043, "timestamp": "2025-09-10 02:28:22.908478", "step": 2318, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:22.937670", "step": 2318, "epoch": 1 }, { "type": "loss", "content": 0.030509835109114647, "timestamp": "2025-09-10 02:28:22.939480", "step": 2319, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:22.968050", "step": 2319, "epoch": 1 }, { "type": "loss", "content": 0.032555047422647476, "timestamp": "2025-09-10 02:28:22.991850", "step": 2320, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:23.020504", "step": 2320, "epoch": 1 }, { "type": "loss", "content": 0.007047518156468868, "timestamp": "2025-09-10 02:28:23.022297", "step": 2321, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:23.051265", "step": 2321, "epoch": 1 }, { "type": "loss", "content": 0.02716813050210476, "timestamp": "2025-09-10 02:28:23.053334", "step": 2322, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:23.084766", "step": 2322, "epoch": 1 }, { "type": "loss", "content": 0.010094745084643364, "timestamp": "2025-09-10 02:28:23.086305", "step": 2323, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:23.115075", "step": 2323, "epoch": 1 }, { "type": "loss", "content": 0.0395091250538826, "timestamp": "2025-09-10 02:28:23.140194", "step": 2324, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:23.173715", "step": 2324, "epoch": 1 }, { "type": "loss", "content": 0.008416331373155117, "timestamp": "2025-09-10 02:28:23.175484", "step": 2325, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:23.205189", "step": 2325, "epoch": 1 }, { "type": "loss", "content": 0.05678853020071983, "timestamp": "2025-09-10 02:28:23.218279", "step": 2326, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:23.252996", "step": 2326, "epoch": 1 }, { "type": "loss", "content": 0.021840985864400864, "timestamp": "2025-09-10 02:28:23.254588", "step": 2327, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:23.282882", "step": 2327, "epoch": 1 }, { "type": "loss", "content": 0.012246578000485897, "timestamp": "2025-09-10 02:28:23.305871", "step": 2328, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:23.335494", "step": 2328, "epoch": 1 }, { "type": "loss", "content": 0.0025970342103391886, "timestamp": "2025-09-10 02:28:23.337085", "step": 2329, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:23.365612", "step": 2329, "epoch": 1 }, { "type": "loss", "content": 0.014994210563600063, "timestamp": "2025-09-10 02:28:23.367058", "step": 2330, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:23.396160", "step": 2330, "epoch": 1 }, { "type": "loss", "content": 0.04930217191576958, "timestamp": "2025-09-10 02:28:23.398098", "step": 2331, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:23.430981", "step": 2331, "epoch": 1 }, { "type": "loss", "content": 0.03074464574456215, "timestamp": "2025-09-10 02:28:23.454377", "step": 2332, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:23.483111", "step": 2332, "epoch": 1 }, { "type": "loss", "content": 0.007758311927318573, "timestamp": "2025-09-10 02:28:23.484706", "step": 2333, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:23.512996", "step": 2333, "epoch": 1 }, { "type": "loss", "content": 0.04545729607343674, "timestamp": "2025-09-10 02:28:23.515515", "step": 2334, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:23.546808", "step": 2334, "epoch": 1 }, { "type": "loss", "content": 0.02094576694071293, "timestamp": "2025-09-10 02:28:23.548530", "step": 2335, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:23.576901", "step": 2335, "epoch": 1 }, { "type": "loss", "content": 0.06826934218406677, "timestamp": "2025-09-10 02:28:23.602122", "step": 2336, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:23.631333", "step": 2336, "epoch": 1 }, { "type": "loss", "content": 0.03085923008620739, "timestamp": "2025-09-10 02:28:23.633145", "step": 2337, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:23.666827", "step": 2337, "epoch": 1 }, { "type": "loss", "content": 0.030749622732400894, "timestamp": "2025-09-10 02:28:23.668335", "step": 2338, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:23.696322", "step": 2338, "epoch": 1 }, { "type": "loss", "content": 0.01941312849521637, "timestamp": "2025-09-10 02:28:23.698081", "step": 2339, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:23.726707", "step": 2339, "epoch": 1 }, { "type": "loss", "content": 0.017771733924746513, "timestamp": "2025-09-10 02:28:23.750074", "step": 2340, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:23.779101", "step": 2340, "epoch": 1 }, { "type": "loss", "content": 0.013400768861174583, "timestamp": "2025-09-10 02:28:23.780807", "step": 2341, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:23.809856", "step": 2341, "epoch": 1 }, { "type": "loss", "content": 0.0965103730559349, "timestamp": "2025-09-10 02:28:23.811538", "step": 2342, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:23.839893", "step": 2342, "epoch": 1 }, { "type": "loss", "content": 0.05872729793190956, "timestamp": "2025-09-10 02:28:23.841605", "step": 2343, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:23.870117", "step": 2343, "epoch": 1 }, { "type": "loss", "content": 0.016339726746082306, "timestamp": "2025-09-10 02:28:23.893358", "step": 2344, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:23.922207", "step": 2344, "epoch": 1 }, { "type": "loss", "content": 0.03597145155072212, "timestamp": "2025-09-10 02:28:23.924219", "step": 2345, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:23.953054", "step": 2345, "epoch": 1 }, { "type": "loss", "content": 0.01645306684076786, "timestamp": "2025-09-10 02:28:23.954778", "step": 2346, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:23.984742", "step": 2346, "epoch": 1 }, { "type": "loss", "content": 0.01860763132572174, "timestamp": "2025-09-10 02:28:23.986848", "step": 2347, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:24.022252", "step": 2347, "epoch": 1 }, { "type": "loss", "content": 0.05072319507598877, "timestamp": "2025-09-10 02:28:24.045706", "step": 2348, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:24.081360", "step": 2348, "epoch": 1 }, { "type": "loss", "content": 0.05628179386258125, "timestamp": "2025-09-10 02:28:24.083012", "step": 2349, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:24.111781", "step": 2349, "epoch": 1 }, { "type": "loss", "content": 0.04576278105378151, "timestamp": "2025-09-10 02:28:24.113968", "step": 2350, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:24.149164", "step": 2350, "epoch": 1 }, { "type": "loss", "content": 0.007431622128933668, "timestamp": "2025-09-10 02:28:24.151385", "step": 2351, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:24.184950", "step": 2351, "epoch": 1 }, { "type": "loss", "content": 0.006663801614195108, "timestamp": "2025-09-10 02:28:24.209918", "step": 2352, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:24.245976", "step": 2352, "epoch": 1 }, { "type": "loss", "content": 0.001480492064729333, "timestamp": "2025-09-10 02:28:24.247928", "step": 2353, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:28:24.277522", "step": 2353, "epoch": 1 }, { "type": "loss", "content": 0.07069874554872513, "timestamp": "2025-09-10 02:28:24.284399", "step": 2354, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:24.313133", "step": 2354, "epoch": 1 }, { "type": "loss", "content": 0.012294160202145576, "timestamp": "2025-09-10 02:28:24.316011", "step": 2355, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:24.346965", "step": 2355, "epoch": 1 }, { "type": "loss", "content": 0.08251728117465973, "timestamp": "2025-09-10 02:28:24.370456", "step": 2356, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:24.399111", "step": 2356, "epoch": 1 }, { "type": "loss", "content": 0.016314489766955376, "timestamp": "2025-09-10 02:28:24.406264", "step": 2357, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:24.438698", "step": 2357, "epoch": 1 }, { "type": "loss", "content": 0.01033691130578518, "timestamp": "2025-09-10 02:28:24.440832", "step": 2358, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:24.470377", "step": 2358, "epoch": 1 }, { "type": "loss", "content": 0.03003569133579731, "timestamp": "2025-09-10 02:28:24.472055", "step": 2359, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:24.500949", "step": 2359, "epoch": 1 }, { "type": "loss", "content": 0.04151889681816101, "timestamp": "2025-09-10 02:28:24.524180", "step": 2360, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:24.557120", "step": 2360, "epoch": 1 }, { "type": "loss", "content": 0.09053687751293182, "timestamp": "2025-09-10 02:28:24.560219", "step": 2361, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:24.591845", "step": 2361, "epoch": 1 }, { "type": "loss", "content": 0.03297692909836769, "timestamp": "2025-09-10 02:28:24.593591", "step": 2362, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:24.622366", "step": 2362, "epoch": 1 }, { "type": "loss", "content": 0.03841349110007286, "timestamp": "2025-09-10 02:28:24.624428", "step": 2363, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:24.663507", "step": 2363, "epoch": 1 }, { "type": "loss", "content": 0.027684299275279045, "timestamp": "2025-09-10 02:28:24.686869", "step": 2364, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:24.719249", "step": 2364, "epoch": 1 }, { "type": "loss", "content": 0.029074719175696373, "timestamp": "2025-09-10 02:28:24.721098", "step": 2365, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:24.760550", "step": 2365, "epoch": 1 }, { "type": "loss", "content": 0.029994269832968712, "timestamp": "2025-09-10 02:28:24.766371", "step": 2366, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:24.795107", "step": 2366, "epoch": 1 }, { "type": "loss", "content": 0.020649809390306473, "timestamp": "2025-09-10 02:28:24.799632", "step": 2367, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:24.828848", "step": 2367, "epoch": 1 }, { "type": "loss", "content": 0.01844174601137638, "timestamp": "2025-09-10 02:28:24.854376", "step": 2368, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:24.883544", "step": 2368, "epoch": 1 }, { "type": "loss", "content": 0.05636049434542656, "timestamp": "2025-09-10 02:28:24.885418", "step": 2369, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:24.914109", "step": 2369, "epoch": 1 }, { "type": "loss", "content": 0.020002515986561775, "timestamp": "2025-09-10 02:28:24.915931", "step": 2370, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:24.944417", "step": 2370, "epoch": 1 }, { "type": "loss", "content": 0.04524189978837967, "timestamp": "2025-09-10 02:28:24.946226", "step": 2371, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:24.975227", "step": 2371, "epoch": 1 }, { "type": "loss", "content": 0.010690070688724518, "timestamp": "2025-09-10 02:28:24.998592", "step": 2372, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:25.027703", "step": 2372, "epoch": 1 }, { "type": "loss", "content": 0.0136357257142663, "timestamp": "2025-09-10 02:28:25.029781", "step": 2373, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:25.058558", "step": 2373, "epoch": 1 }, { "type": "loss", "content": 0.052628517150878906, "timestamp": "2025-09-10 02:28:25.061751", "step": 2374, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:25.090202", "step": 2374, "epoch": 1 }, { "type": "loss", "content": 0.009049796499311924, "timestamp": "2025-09-10 02:28:25.095938", "step": 2375, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:25.126599", "step": 2375, "epoch": 1 }, { "type": "loss", "content": 0.029583189636468887, "timestamp": "2025-09-10 02:28:25.150071", "step": 2376, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:25.183891", "step": 2376, "epoch": 1 }, { "type": "loss", "content": 0.03513399139046669, "timestamp": "2025-09-10 02:28:25.185911", "step": 2377, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:25.216581", "step": 2377, "epoch": 1 }, { "type": "loss", "content": 0.03157564997673035, "timestamp": "2025-09-10 02:28:25.218536", "step": 2378, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:25.252019", "step": 2378, "epoch": 1 }, { "type": "loss", "content": 0.02360604517161846, "timestamp": "2025-09-10 02:28:25.253911", "step": 2379, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:25.289690", "step": 2379, "epoch": 1 }, { "type": "loss", "content": 0.018449855968356133, "timestamp": "2025-09-10 02:28:25.313095", "step": 2380, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:25.342042", "step": 2380, "epoch": 1 }, { "type": "loss", "content": 0.04448559135198593, "timestamp": "2025-09-10 02:28:25.351552", "step": 2381, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:25.380942", "step": 2381, "epoch": 1 }, { "type": "loss", "content": 0.04531589522957802, "timestamp": "2025-09-10 02:28:25.389388", "step": 2382, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:25.419050", "step": 2382, "epoch": 1 }, { "type": "loss", "content": 0.02107251062989235, "timestamp": "2025-09-10 02:28:25.421129", "step": 2383, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:25.449984", "step": 2383, "epoch": 1 }, { "type": "loss", "content": 0.033820513635873795, "timestamp": "2025-09-10 02:28:25.473501", "step": 2384, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:25.502339", "step": 2384, "epoch": 1 }, { "type": "loss", "content": 0.032583147287368774, "timestamp": "2025-09-10 02:28:25.505527", "step": 2385, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:25.541452", "step": 2385, "epoch": 1 }, { "type": "loss", "content": 0.0029801710043102503, "timestamp": "2025-09-10 02:28:25.546861", "step": 2386, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:25.579754", "step": 2386, "epoch": 1 }, { "type": "loss", "content": 0.02128872647881508, "timestamp": "2025-09-10 02:28:25.581502", "step": 2387, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:25.610420", "step": 2387, "epoch": 1 }, { "type": "loss", "content": 0.04043685272336006, "timestamp": "2025-09-10 02:28:25.634014", "step": 2388, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:25.663181", "step": 2388, "epoch": 1 }, { "type": "loss", "content": 0.015649553388357162, "timestamp": "2025-09-10 02:28:25.665081", "step": 2389, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:25.693808", "step": 2389, "epoch": 1 }, { "type": "loss", "content": 0.010012378916144371, "timestamp": "2025-09-10 02:28:25.695714", "step": 2390, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:25.724165", "step": 2390, "epoch": 1 }, { "type": "loss", "content": 0.05852745845913887, "timestamp": "2025-09-10 02:28:25.726921", "step": 2391, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:25.755566", "step": 2391, "epoch": 1 }, { "type": "loss", "content": 0.034893278032541275, "timestamp": "2025-09-10 02:28:25.779627", "step": 2392, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:25.810632", "step": 2392, "epoch": 1 }, { "type": "loss", "content": 0.016062278300523758, "timestamp": "2025-09-10 02:28:25.814072", "step": 2393, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:25.843631", "step": 2393, "epoch": 1 }, { "type": "loss", "content": 0.05764563009142876, "timestamp": "2025-09-10 02:28:25.845531", "step": 2394, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:25.873863", "step": 2394, "epoch": 1 }, { "type": "loss", "content": 0.01467811968177557, "timestamp": "2025-09-10 02:28:25.875926", "step": 2395, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:25.904767", "step": 2395, "epoch": 1 }, { "type": "loss", "content": 0.06349930167198181, "timestamp": "2025-09-10 02:28:25.928149", "step": 2396, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:25.962545", "step": 2396, "epoch": 1 }, { "type": "loss", "content": 0.029672754928469658, "timestamp": "2025-09-10 02:28:25.964580", "step": 2397, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:25.994663", "step": 2397, "epoch": 1 }, { "type": "loss", "content": 0.039500582963228226, "timestamp": "2025-09-10 02:28:26.007527", "step": 2398, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:26.042457", "step": 2398, "epoch": 1 }, { "type": "loss", "content": 0.02818789891898632, "timestamp": "2025-09-10 02:28:26.049377", "step": 2399, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:26.079190", "step": 2399, "epoch": 1 }, { "type": "loss", "content": 0.028065938502550125, "timestamp": "2025-09-10 02:28:26.107328", "step": 2400, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:26.136251", "step": 2400, "epoch": 1 }, { "type": "loss", "content": 0.021312285214662552, "timestamp": "2025-09-10 02:28:26.138140", "step": 2401, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:26.166999", "step": 2401, "epoch": 1 }, { "type": "loss", "content": 0.0122200483456254, "timestamp": "2025-09-10 02:28:26.178257", "step": 2402, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:26.221337", "step": 2402, "epoch": 1 }, { "type": "loss", "content": 0.02185259386897087, "timestamp": "2025-09-10 02:28:26.223343", "step": 2403, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:26.252427", "step": 2403, "epoch": 1 }, { "type": "loss", "content": 0.031530894339084625, "timestamp": "2025-09-10 02:28:26.275946", "step": 2404, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:26.305987", "step": 2404, "epoch": 1 }, { "type": "loss", "content": 0.026655616238713264, "timestamp": "2025-09-10 02:28:26.307787", "step": 2405, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:26.336449", "step": 2405, "epoch": 1 }, { "type": "loss", "content": 0.019431522116065025, "timestamp": "2025-09-10 02:28:26.338237", "step": 2406, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:26.367638", "step": 2406, "epoch": 1 }, { "type": "loss", "content": 0.016600485891103745, "timestamp": "2025-09-10 02:28:26.373672", "step": 2407, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:26.403809", "step": 2407, "epoch": 1 }, { "type": "loss", "content": 0.03890708088874817, "timestamp": "2025-09-10 02:28:26.427366", "step": 2408, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:26.456694", "step": 2408, "epoch": 1 }, { "type": "loss", "content": 0.03087475337088108, "timestamp": "2025-09-10 02:28:26.460779", "step": 2409, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:26.491110", "step": 2409, "epoch": 1 }, { "type": "loss", "content": 0.05119401216506958, "timestamp": "2025-09-10 02:28:26.492827", "step": 2410, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:26.521324", "step": 2410, "epoch": 1 }, { "type": "loss", "content": 0.026257485151290894, "timestamp": "2025-09-10 02:28:26.523896", "step": 2411, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:26.552429", "step": 2411, "epoch": 1 }, { "type": "loss", "content": 0.04165105149149895, "timestamp": "2025-09-10 02:28:26.575994", "step": 2412, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:26.605181", "step": 2412, "epoch": 1 }, { "type": "loss", "content": 0.018239473924040794, "timestamp": "2025-09-10 02:28:26.607228", "step": 2413, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:26.635981", "step": 2413, "epoch": 1 }, { "type": "loss", "content": 0.06883590668439865, "timestamp": "2025-09-10 02:28:26.640361", "step": 2414, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:26.669169", "step": 2414, "epoch": 1 }, { "type": "loss", "content": 0.006214165594428778, "timestamp": "2025-09-10 02:28:26.672815", "step": 2415, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:26.703564", "step": 2415, "epoch": 1 }, { "type": "loss", "content": 0.013146909885108471, "timestamp": "2025-09-10 02:28:26.727396", "step": 2416, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:26.756980", "step": 2416, "epoch": 1 }, { "type": "loss", "content": 0.004848008044064045, "timestamp": "2025-09-10 02:28:26.758940", "step": 2417, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:26.787498", "step": 2417, "epoch": 1 }, { "type": "loss", "content": 0.017544735223054886, "timestamp": "2025-09-10 02:28:26.791740", "step": 2418, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:26.826304", "step": 2418, "epoch": 1 }, { "type": "loss", "content": 0.03728755936026573, "timestamp": "2025-09-10 02:28:26.831474", "step": 2419, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:26.869022", "step": 2419, "epoch": 1 }, { "type": "loss", "content": 0.05490528419613838, "timestamp": "2025-09-10 02:28:26.894754", "step": 2420, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:26.923482", "step": 2420, "epoch": 1 }, { "type": "loss", "content": 0.025458218529820442, "timestamp": "2025-09-10 02:28:26.927694", "step": 2421, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:26.959886", "step": 2421, "epoch": 1 }, { "type": "loss", "content": 0.024190718308091164, "timestamp": "2025-09-10 02:28:26.961622", "step": 2422, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:26.992026", "step": 2422, "epoch": 1 }, { "type": "loss", "content": 0.02364364080131054, "timestamp": "2025-09-10 02:28:26.993781", "step": 2423, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:27.022244", "step": 2423, "epoch": 1 }, { "type": "loss", "content": 0.013525965623557568, "timestamp": "2025-09-10 02:28:27.045516", "step": 2424, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:27.074357", "step": 2424, "epoch": 1 }, { "type": "loss", "content": 0.048916157335042953, "timestamp": "2025-09-10 02:28:27.076086", "step": 2425, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:27.105435", "step": 2425, "epoch": 1 }, { "type": "loss", "content": 0.018245337530970573, "timestamp": "2025-09-10 02:28:27.107376", "step": 2426, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:27.136046", "step": 2426, "epoch": 1 }, { "type": "loss", "content": 0.03028123266994953, "timestamp": "2025-09-10 02:28:27.138698", "step": 2427, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:27.171574", "step": 2427, "epoch": 1 }, { "type": "loss", "content": 0.04529779404401779, "timestamp": "2025-09-10 02:28:27.194833", "step": 2428, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:27.227138", "step": 2428, "epoch": 1 }, { "type": "loss", "content": 0.04372987896203995, "timestamp": "2025-09-10 02:28:27.228936", "step": 2429, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:27.257997", "step": 2429, "epoch": 1 }, { "type": "loss", "content": 0.04191436618566513, "timestamp": "2025-09-10 02:28:27.260828", "step": 2430, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:27.289611", "step": 2430, "epoch": 1 }, { "type": "loss", "content": 0.05567625164985657, "timestamp": "2025-09-10 02:28:27.291373", "step": 2431, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:27.320078", "step": 2431, "epoch": 1 }, { "type": "loss", "content": 0.033835552632808685, "timestamp": "2025-09-10 02:28:27.343919", "step": 2432, "epoch": 1 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 96 ], "batch_size": 8, "flops": 1898404492032 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 5, 80 ], "batch_size": 8, "flops": 1582003754624 } ], "timestamp": "2025-09-10 02:28:29.386312", "step": 2432, "epoch": 1 }, { "type": "pplx", "content": 2158993.796437208, "timestamp": "2025-09-10 02:28:29.388121", "step": 2432, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:29.417023", "step": 2432, "epoch": 1 }, { "type": "loss", "content": 0.016373800113797188, "timestamp": "2025-09-10 02:28:29.418840", "step": 2433, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:29.448004", "step": 2433, "epoch": 1 }, { "type": "loss", "content": 0.021115172654390335, "timestamp": "2025-09-10 02:28:29.450131", "step": 2434, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:29.479141", "step": 2434, "epoch": 1 }, { "type": "loss", "content": 0.02003416419029236, "timestamp": "2025-09-10 02:28:29.485171", "step": 2435, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 1, 80 ], "flops": 593517404912 }, "timestamp": "2025-09-10 02:28:29.587739", "step": 2435, "epoch": 1 }, { "type": "loss", "content": 0.04149497672915459, "timestamp": "2025-09-10 02:28:29.612980", "step": 2436, "epoch": 1 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:29.642893", "step": 2436, "epoch": 2 }, { "type": "loss", "content": 0.030175983905792236, "timestamp": "2025-09-10 02:28:29.649266", "step": 2437, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:29.680927", "step": 2437, "epoch": 2 }, { "type": "loss", "content": 0.02488701418042183, "timestamp": "2025-09-10 02:28:29.683191", "step": 2438, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:29.717322", "step": 2438, "epoch": 2 }, { "type": "loss", "content": 0.01973804645240307, "timestamp": "2025-09-10 02:28:29.722599", "step": 2439, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:29.751533", "step": 2439, "epoch": 2 }, { "type": "loss", "content": 0.05977180227637291, "timestamp": "2025-09-10 02:28:29.774911", "step": 2440, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:29.807294", "step": 2440, "epoch": 2 }, { "type": "loss", "content": 0.033726807683706284, "timestamp": "2025-09-10 02:28:29.809192", "step": 2441, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:29.843227", "step": 2441, "epoch": 2 }, { "type": "loss", "content": 0.004302346147596836, "timestamp": "2025-09-10 02:28:29.845166", "step": 2442, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:29.874085", "step": 2442, "epoch": 2 }, { "type": "loss", "content": 0.0188862644135952, "timestamp": "2025-09-10 02:28:29.878979", "step": 2443, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:29.908311", "step": 2443, "epoch": 2 }, { "type": "loss", "content": 0.02564084902405739, "timestamp": "2025-09-10 02:28:29.931744", "step": 2444, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:29.974981", "step": 2444, "epoch": 2 }, { "type": "loss", "content": 0.028563078492879868, "timestamp": "2025-09-10 02:28:29.983528", "step": 2445, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:30.013267", "step": 2445, "epoch": 2 }, { "type": "loss", "content": 0.020576000213623047, "timestamp": "2025-09-10 02:28:30.015658", "step": 2446, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:30.045698", "step": 2446, "epoch": 2 }, { "type": "loss", "content": 0.037143293768167496, "timestamp": "2025-09-10 02:28:30.047495", "step": 2447, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:30.078790", "step": 2447, "epoch": 2 }, { "type": "loss", "content": 0.02041972242295742, "timestamp": "2025-09-10 02:28:30.102354", "step": 2448, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:30.144929", "step": 2448, "epoch": 2 }, { "type": "loss", "content": 0.03853491321206093, "timestamp": "2025-09-10 02:28:30.149989", "step": 2449, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:30.181165", "step": 2449, "epoch": 2 }, { "type": "loss", "content": 0.02254076674580574, "timestamp": "2025-09-10 02:28:30.185583", "step": 2450, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:30.218234", "step": 2450, "epoch": 2 }, { "type": "loss", "content": 0.016871871426701546, "timestamp": "2025-09-10 02:28:30.230115", "step": 2451, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:30.263766", "step": 2451, "epoch": 2 }, { "type": "loss", "content": 0.05091606453061104, "timestamp": "2025-09-10 02:28:30.287121", "step": 2452, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:30.316190", "step": 2452, "epoch": 2 }, { "type": "loss", "content": 0.016244972124695778, "timestamp": "2025-09-10 02:28:30.318160", "step": 2453, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:30.347622", "step": 2453, "epoch": 2 }, { "type": "loss", "content": 0.008798770606517792, "timestamp": "2025-09-10 02:28:30.349421", "step": 2454, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:30.379325", "step": 2454, "epoch": 2 }, { "type": "loss", "content": 0.026254108175635338, "timestamp": "2025-09-10 02:28:30.381386", "step": 2455, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:30.413252", "step": 2455, "epoch": 2 }, { "type": "loss", "content": 0.01920224539935589, "timestamp": "2025-09-10 02:28:30.436701", "step": 2456, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:30.470102", "step": 2456, "epoch": 2 }, { "type": "loss", "content": 0.013258195482194424, "timestamp": "2025-09-10 02:28:30.472753", "step": 2457, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:30.501925", "step": 2457, "epoch": 2 }, { "type": "loss", "content": 0.017449038103222847, "timestamp": "2025-09-10 02:28:30.504111", "step": 2458, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:30.532870", "step": 2458, "epoch": 2 }, { "type": "loss", "content": 0.032133761793375015, "timestamp": "2025-09-10 02:28:30.534766", "step": 2459, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:30.563625", "step": 2459, "epoch": 2 }, { "type": "loss", "content": 0.01655067689716816, "timestamp": "2025-09-10 02:28:30.587113", "step": 2460, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:30.616780", "step": 2460, "epoch": 2 }, { "type": "loss", "content": 0.013599888421595097, "timestamp": "2025-09-10 02:28:30.618367", "step": 2461, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:30.647589", "step": 2461, "epoch": 2 }, { "type": "loss", "content": 0.017768505960702896, "timestamp": "2025-09-10 02:28:30.649582", "step": 2462, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:30.678235", "step": 2462, "epoch": 2 }, { "type": "loss", "content": 0.004869905766099691, "timestamp": "2025-09-10 02:28:30.680075", "step": 2463, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:30.708636", "step": 2463, "epoch": 2 }, { "type": "loss", "content": 0.018566574901342392, "timestamp": "2025-09-10 02:28:30.732604", "step": 2464, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:30.763003", "step": 2464, "epoch": 2 }, { "type": "loss", "content": 0.0010703759035095572, "timestamp": "2025-09-10 02:28:30.765249", "step": 2465, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:30.794831", "step": 2465, "epoch": 2 }, { "type": "loss", "content": 0.015269110910594463, "timestamp": "2025-09-10 02:28:30.799862", "step": 2466, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:30.835106", "step": 2466, "epoch": 2 }, { "type": "loss", "content": 0.02452816627919674, "timestamp": "2025-09-10 02:28:30.836868", "step": 2467, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:30.866402", "step": 2467, "epoch": 2 }, { "type": "loss", "content": 0.02096046321094036, "timestamp": "2025-09-10 02:28:30.890075", "step": 2468, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:30.923340", "step": 2468, "epoch": 2 }, { "type": "loss", "content": 0.037257127463817596, "timestamp": "2025-09-10 02:28:30.929124", "step": 2469, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:30.966046", "step": 2469, "epoch": 2 }, { "type": "loss", "content": 0.028221143409609795, "timestamp": "2025-09-10 02:28:30.967798", "step": 2470, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:30.998221", "step": 2470, "epoch": 2 }, { "type": "loss", "content": 0.01037190668284893, "timestamp": "2025-09-10 02:28:31.000282", "step": 2471, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:31.031593", "step": 2471, "epoch": 2 }, { "type": "loss", "content": 0.00449879327788949, "timestamp": "2025-09-10 02:28:31.060190", "step": 2472, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:31.089742", "step": 2472, "epoch": 2 }, { "type": "loss", "content": 0.008812216110527515, "timestamp": "2025-09-10 02:28:31.091787", "step": 2473, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:31.124476", "step": 2473, "epoch": 2 }, { "type": "loss", "content": 0.00666913902387023, "timestamp": "2025-09-10 02:28:31.129592", "step": 2474, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:31.158828", "step": 2474, "epoch": 2 }, { "type": "loss", "content": 0.008816850371658802, "timestamp": "2025-09-10 02:28:31.161132", "step": 2475, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:31.190080", "step": 2475, "epoch": 2 }, { "type": "loss", "content": 0.01382219884544611, "timestamp": "2025-09-10 02:28:31.220662", "step": 2476, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:31.253306", "step": 2476, "epoch": 2 }, { "type": "loss", "content": 0.028483625501394272, "timestamp": "2025-09-10 02:28:31.255379", "step": 2477, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:31.284350", "step": 2477, "epoch": 2 }, { "type": "loss", "content": 0.0031778302509337664, "timestamp": "2025-09-10 02:28:31.286321", "step": 2478, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:31.316018", "step": 2478, "epoch": 2 }, { "type": "loss", "content": 0.005270532798022032, "timestamp": "2025-09-10 02:28:31.321055", "step": 2479, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:31.350572", "step": 2479, "epoch": 2 }, { "type": "loss", "content": 0.046177320182323456, "timestamp": "2025-09-10 02:28:31.374143", "step": 2480, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:31.411092", "step": 2480, "epoch": 2 }, { "type": "loss", "content": 0.004339002072811127, "timestamp": "2025-09-10 02:28:31.417184", "step": 2481, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:31.458815", "step": 2481, "epoch": 2 }, { "type": "loss", "content": 0.014661594294011593, "timestamp": "2025-09-10 02:28:31.460704", "step": 2482, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:31.496032", "step": 2482, "epoch": 2 }, { "type": "loss", "content": 0.0027640238404273987, "timestamp": "2025-09-10 02:28:31.497802", "step": 2483, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:31.526533", "step": 2483, "epoch": 2 }, { "type": "loss", "content": 0.0363384447991848, "timestamp": "2025-09-10 02:28:31.551783", "step": 2484, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:31.584971", "step": 2484, "epoch": 2 }, { "type": "loss", "content": 0.05729368329048157, "timestamp": "2025-09-10 02:28:31.586902", "step": 2485, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:31.615481", "step": 2485, "epoch": 2 }, { "type": "loss", "content": 0.017933333292603493, "timestamp": "2025-09-10 02:28:31.617459", "step": 2486, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:31.646426", "step": 2486, "epoch": 2 }, { "type": "loss", "content": 0.04761345311999321, "timestamp": "2025-09-10 02:28:31.648638", "step": 2487, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:28:31.677304", "step": 2487, "epoch": 2 }, { "type": "loss", "content": 0.021747812628746033, "timestamp": "2025-09-10 02:28:31.700856", "step": 2488, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:31.729450", "step": 2488, "epoch": 2 }, { "type": "loss", "content": 0.09620558470487595, "timestamp": "2025-09-10 02:28:31.731191", "step": 2489, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:31.760121", "step": 2489, "epoch": 2 }, { "type": "loss", "content": 0.0060876780189573765, "timestamp": "2025-09-10 02:28:31.767275", "step": 2490, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:31.802050", "step": 2490, "epoch": 2 }, { "type": "loss", "content": 0.007496859412640333, "timestamp": "2025-09-10 02:28:31.803835", "step": 2491, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:31.832695", "step": 2491, "epoch": 2 }, { "type": "loss", "content": 0.038308579474687576, "timestamp": "2025-09-10 02:28:31.855914", "step": 2492, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:31.884613", "step": 2492, "epoch": 2 }, { "type": "loss", "content": 0.011420799419283867, "timestamp": "2025-09-10 02:28:31.886518", "step": 2493, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:31.920924", "step": 2493, "epoch": 2 }, { "type": "loss", "content": 0.027831334620714188, "timestamp": "2025-09-10 02:28:31.923020", "step": 2494, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:31.956075", "step": 2494, "epoch": 2 }, { "type": "loss", "content": 0.0011441315291449428, "timestamp": "2025-09-10 02:28:31.961311", "step": 2495, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:31.997411", "step": 2495, "epoch": 2 }, { "type": "loss", "content": 0.06489991396665573, "timestamp": "2025-09-10 02:28:32.020942", "step": 2496, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:32.050581", "step": 2496, "epoch": 2 }, { "type": "loss", "content": 0.0068673426285386086, "timestamp": "2025-09-10 02:28:32.052444", "step": 2497, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:32.081635", "step": 2497, "epoch": 2 }, { "type": "loss", "content": 0.01522838231176138, "timestamp": "2025-09-10 02:28:32.083515", "step": 2498, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:32.118377", "step": 2498, "epoch": 2 }, { "type": "loss", "content": 0.04047464206814766, "timestamp": "2025-09-10 02:28:32.121851", "step": 2499, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:32.152023", "step": 2499, "epoch": 2 }, { "type": "loss", "content": 0.028804613277316093, "timestamp": "2025-09-10 02:28:32.175625", "step": 2500, "epoch": 2 }, { "type": "info", "content": "Checkpoint saved at step 2500", "timestamp": "2025-09-10 02:28:36.661355", "step": 2500, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:36.699811", "step": 2500, "epoch": 2 }, { "type": "loss", "content": 0.04566410928964615, "timestamp": "2025-09-10 02:28:36.701594", "step": 2501, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:36.730919", "step": 2501, "epoch": 2 }, { "type": "loss", "content": 0.0038940771482884884, "timestamp": "2025-09-10 02:28:36.733940", "step": 2502, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:36.764923", "step": 2502, "epoch": 2 }, { "type": "loss", "content": 0.005365054588764906, "timestamp": "2025-09-10 02:28:36.767684", "step": 2503, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:36.796559", "step": 2503, "epoch": 2 }, { "type": "loss", "content": 0.008775746449828148, "timestamp": "2025-09-10 02:28:36.820387", "step": 2504, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:36.851528", "step": 2504, "epoch": 2 }, { "type": "loss", "content": 0.06777717918157578, "timestamp": "2025-09-10 02:28:36.853341", "step": 2505, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:36.882745", "step": 2505, "epoch": 2 }, { "type": "loss", "content": 0.016283374279737473, "timestamp": "2025-09-10 02:28:36.884678", "step": 2506, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:36.913400", "step": 2506, "epoch": 2 }, { "type": "loss", "content": 0.025961117818951607, "timestamp": "2025-09-10 02:28:36.915247", "step": 2507, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:36.944639", "step": 2507, "epoch": 2 }, { "type": "loss", "content": 0.015359207056462765, "timestamp": "2025-09-10 02:28:36.968379", "step": 2508, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:36.997144", "step": 2508, "epoch": 2 }, { "type": "loss", "content": 0.024218466132879257, "timestamp": "2025-09-10 02:28:36.998853", "step": 2509, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:37.027321", "step": 2509, "epoch": 2 }, { "type": "loss", "content": 0.023411747068166733, "timestamp": "2025-09-10 02:28:37.029001", "step": 2510, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:37.057988", "step": 2510, "epoch": 2 }, { "type": "loss", "content": 0.060719482600688934, "timestamp": "2025-09-10 02:28:37.059829", "step": 2511, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:37.088745", "step": 2511, "epoch": 2 }, { "type": "loss", "content": 0.008819801732897758, "timestamp": "2025-09-10 02:28:37.112095", "step": 2512, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:37.140864", "step": 2512, "epoch": 2 }, { "type": "loss", "content": 0.03373945131897926, "timestamp": "2025-09-10 02:28:37.142889", "step": 2513, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:37.171714", "step": 2513, "epoch": 2 }, { "type": "loss", "content": 0.03358836844563484, "timestamp": "2025-09-10 02:28:37.173547", "step": 2514, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:37.202581", "step": 2514, "epoch": 2 }, { "type": "loss", "content": 0.03507380932569504, "timestamp": "2025-09-10 02:28:37.204262", "step": 2515, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:37.232727", "step": 2515, "epoch": 2 }, { "type": "loss", "content": 0.006715219002217054, "timestamp": "2025-09-10 02:28:37.256120", "step": 2516, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:28:37.284867", "step": 2516, "epoch": 2 }, { "type": "loss", "content": 0.009967942722141743, "timestamp": "2025-09-10 02:28:37.286783", "step": 2517, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:37.315528", "step": 2517, "epoch": 2 }, { "type": "loss", "content": 0.05242612957954407, "timestamp": "2025-09-10 02:28:37.317284", "step": 2518, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:37.346105", "step": 2518, "epoch": 2 }, { "type": "loss", "content": 0.01925036497414112, "timestamp": "2025-09-10 02:28:37.347928", "step": 2519, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:37.376272", "step": 2519, "epoch": 2 }, { "type": "loss", "content": 0.04645388573408127, "timestamp": "2025-09-10 02:28:37.399605", "step": 2520, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:37.428407", "step": 2520, "epoch": 2 }, { "type": "loss", "content": 0.018919702619314194, "timestamp": "2025-09-10 02:28:37.430141", "step": 2521, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:37.458550", "step": 2521, "epoch": 2 }, { "type": "loss", "content": 0.038673412054777145, "timestamp": "2025-09-10 02:28:37.460352", "step": 2522, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:37.489229", "step": 2522, "epoch": 2 }, { "type": "loss", "content": 0.028117135167121887, "timestamp": "2025-09-10 02:28:37.491089", "step": 2523, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:37.519640", "step": 2523, "epoch": 2 }, { "type": "loss", "content": 0.01898709125816822, "timestamp": "2025-09-10 02:28:37.542858", "step": 2524, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:37.571692", "step": 2524, "epoch": 2 }, { "type": "loss", "content": 0.007139905821532011, "timestamp": "2025-09-10 02:28:37.573295", "step": 2525, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:37.602185", "step": 2525, "epoch": 2 }, { "type": "loss", "content": 0.021932478994131088, "timestamp": "2025-09-10 02:28:37.604436", "step": 2526, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:37.632945", "step": 2526, "epoch": 2 }, { "type": "loss", "content": 0.030555009841918945, "timestamp": "2025-09-10 02:28:37.634568", "step": 2527, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:37.663346", "step": 2527, "epoch": 2 }, { "type": "loss", "content": 0.022133933380246162, "timestamp": "2025-09-10 02:28:37.686654", "step": 2528, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:37.715622", "step": 2528, "epoch": 2 }, { "type": "loss", "content": 0.020784815773367882, "timestamp": "2025-09-10 02:28:37.717292", "step": 2529, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:37.747072", "step": 2529, "epoch": 2 }, { "type": "loss", "content": 0.018027016893029213, "timestamp": "2025-09-10 02:28:37.748787", "step": 2530, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:37.777372", "step": 2530, "epoch": 2 }, { "type": "loss", "content": 0.04622536525130272, "timestamp": "2025-09-10 02:28:37.780375", "step": 2531, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:37.810166", "step": 2531, "epoch": 2 }, { "type": "loss", "content": 0.01820000819861889, "timestamp": "2025-09-10 02:28:37.833434", "step": 2532, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:37.862635", "step": 2532, "epoch": 2 }, { "type": "loss", "content": 0.027822909876704216, "timestamp": "2025-09-10 02:28:37.864286", "step": 2533, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:37.893185", "step": 2533, "epoch": 2 }, { "type": "loss", "content": 0.0055721839889883995, "timestamp": "2025-09-10 02:28:37.895084", "step": 2534, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:37.923962", "step": 2534, "epoch": 2 }, { "type": "loss", "content": 0.020961089059710503, "timestamp": "2025-09-10 02:28:37.925672", "step": 2535, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:37.954699", "step": 2535, "epoch": 2 }, { "type": "loss", "content": 0.015716660767793655, "timestamp": "2025-09-10 02:28:37.978075", "step": 2536, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:38.006857", "step": 2536, "epoch": 2 }, { "type": "loss", "content": 0.02635492943227291, "timestamp": "2025-09-10 02:28:38.008896", "step": 2537, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:38.037279", "step": 2537, "epoch": 2 }, { "type": "loss", "content": 0.019367411732673645, "timestamp": "2025-09-10 02:28:38.038968", "step": 2538, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:38.067699", "step": 2538, "epoch": 2 }, { "type": "loss", "content": 0.03078184649348259, "timestamp": "2025-09-10 02:28:38.069771", "step": 2539, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:38.098866", "step": 2539, "epoch": 2 }, { "type": "loss", "content": 0.006509651895612478, "timestamp": "2025-09-10 02:28:38.122395", "step": 2540, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:38.151574", "step": 2540, "epoch": 2 }, { "type": "loss", "content": 0.006312340032309294, "timestamp": "2025-09-10 02:28:38.153223", "step": 2541, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:38.183136", "step": 2541, "epoch": 2 }, { "type": "loss", "content": 0.007816670462489128, "timestamp": "2025-09-10 02:28:38.185007", "step": 2542, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:38.213801", "step": 2542, "epoch": 2 }, { "type": "loss", "content": 0.040647126734256744, "timestamp": "2025-09-10 02:28:38.215655", "step": 2543, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:38.244169", "step": 2543, "epoch": 2 }, { "type": "loss", "content": 0.020041000097990036, "timestamp": "2025-09-10 02:28:38.267372", "step": 2544, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:38.295950", "step": 2544, "epoch": 2 }, { "type": "loss", "content": 0.006902061402797699, "timestamp": "2025-09-10 02:28:38.297706", "step": 2545, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:38.325988", "step": 2545, "epoch": 2 }, { "type": "loss", "content": 0.016499994322657585, "timestamp": "2025-09-10 02:28:38.327784", "step": 2546, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:38.356411", "step": 2546, "epoch": 2 }, { "type": "loss", "content": 0.050838652998209, "timestamp": "2025-09-10 02:28:38.358254", "step": 2547, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:38.386601", "step": 2547, "epoch": 2 }, { "type": "loss", "content": 0.013835342600941658, "timestamp": "2025-09-10 02:28:38.409806", "step": 2548, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:38.438283", "step": 2548, "epoch": 2 }, { "type": "loss", "content": 0.0341733880341053, "timestamp": "2025-09-10 02:28:38.440247", "step": 2549, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:38.469709", "step": 2549, "epoch": 2 }, { "type": "loss", "content": 0.00761485705152154, "timestamp": "2025-09-10 02:28:38.471456", "step": 2550, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:38.500935", "step": 2550, "epoch": 2 }, { "type": "loss", "content": 0.04974096640944481, "timestamp": "2025-09-10 02:28:38.502957", "step": 2551, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:38.532220", "step": 2551, "epoch": 2 }, { "type": "loss", "content": 0.013284144923090935, "timestamp": "2025-09-10 02:28:38.555584", "step": 2552, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:38.585107", "step": 2552, "epoch": 2 }, { "type": "loss", "content": 0.04868572577834129, "timestamp": "2025-09-10 02:28:38.586846", "step": 2553, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:38.616993", "step": 2553, "epoch": 2 }, { "type": "loss", "content": 0.006279000546783209, "timestamp": "2025-09-10 02:28:38.618720", "step": 2554, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:38.647613", "step": 2554, "epoch": 2 }, { "type": "loss", "content": 0.009842381812632084, "timestamp": "2025-09-10 02:28:38.649217", "step": 2555, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:38.677294", "step": 2555, "epoch": 2 }, { "type": "loss", "content": 0.007323114667087793, "timestamp": "2025-09-10 02:28:38.700624", "step": 2556, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:38.730470", "step": 2556, "epoch": 2 }, { "type": "loss", "content": 0.0027403663843870163, "timestamp": "2025-09-10 02:28:38.732345", "step": 2557, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:38.761195", "step": 2557, "epoch": 2 }, { "type": "loss", "content": 0.007774870377033949, "timestamp": "2025-09-10 02:28:38.763080", "step": 2558, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:38.791931", "step": 2558, "epoch": 2 }, { "type": "loss", "content": 0.02943340130150318, "timestamp": "2025-09-10 02:28:38.793972", "step": 2559, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:38.822712", "step": 2559, "epoch": 2 }, { "type": "loss", "content": 0.008991079404950142, "timestamp": "2025-09-10 02:28:38.846010", "step": 2560, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:38.875309", "step": 2560, "epoch": 2 }, { "type": "loss", "content": 0.06065422296524048, "timestamp": "2025-09-10 02:28:38.877038", "step": 2561, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:38.906036", "step": 2561, "epoch": 2 }, { "type": "loss", "content": 0.003688907716423273, "timestamp": "2025-09-10 02:28:38.907890", "step": 2562, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:38.936639", "step": 2562, "epoch": 2 }, { "type": "loss", "content": 0.034662097692489624, "timestamp": "2025-09-10 02:28:38.938346", "step": 2563, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:38.966914", "step": 2563, "epoch": 2 }, { "type": "loss", "content": 0.015825720503926277, "timestamp": "2025-09-10 02:28:38.990013", "step": 2564, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:39.018690", "step": 2564, "epoch": 2 }, { "type": "loss", "content": 0.00597797054797411, "timestamp": "2025-09-10 02:28:39.020777", "step": 2565, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:39.049590", "step": 2565, "epoch": 2 }, { "type": "loss", "content": 0.0023908796720206738, "timestamp": "2025-09-10 02:28:39.051192", "step": 2566, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:39.079745", "step": 2566, "epoch": 2 }, { "type": "loss", "content": 0.007320891600102186, "timestamp": "2025-09-10 02:28:39.081257", "step": 2567, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:39.110070", "step": 2567, "epoch": 2 }, { "type": "loss", "content": 0.006848949007689953, "timestamp": "2025-09-10 02:28:39.133209", "step": 2568, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:39.161759", "step": 2568, "epoch": 2 }, { "type": "loss", "content": 0.004966162610799074, "timestamp": "2025-09-10 02:28:39.163564", "step": 2569, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:39.192309", "step": 2569, "epoch": 2 }, { "type": "loss", "content": 0.0005787332192994654, "timestamp": "2025-09-10 02:28:39.193984", "step": 2570, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:39.222918", "step": 2570, "epoch": 2 }, { "type": "loss", "content": 0.01637382246553898, "timestamp": "2025-09-10 02:28:39.224654", "step": 2571, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:39.253226", "step": 2571, "epoch": 2 }, { "type": "loss", "content": 0.016918400302529335, "timestamp": "2025-09-10 02:28:39.276688", "step": 2572, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:39.305813", "step": 2572, "epoch": 2 }, { "type": "loss", "content": 0.018513483926653862, "timestamp": "2025-09-10 02:28:39.307489", "step": 2573, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:39.336169", "step": 2573, "epoch": 2 }, { "type": "loss", "content": 0.007836680859327316, "timestamp": "2025-09-10 02:28:39.337775", "step": 2574, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:39.366291", "step": 2574, "epoch": 2 }, { "type": "loss", "content": 0.012387419119477272, "timestamp": "2025-09-10 02:28:39.367963", "step": 2575, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:39.396554", "step": 2575, "epoch": 2 }, { "type": "loss", "content": 0.006354599259793758, "timestamp": "2025-09-10 02:28:39.419660", "step": 2576, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:39.448347", "step": 2576, "epoch": 2 }, { "type": "loss", "content": 0.0026027504354715347, "timestamp": "2025-09-10 02:28:39.450156", "step": 2577, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:39.478869", "step": 2577, "epoch": 2 }, { "type": "loss", "content": 0.0239602942019701, "timestamp": "2025-09-10 02:28:39.480543", "step": 2578, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:39.509059", "step": 2578, "epoch": 2 }, { "type": "loss", "content": 0.0015381601406261325, "timestamp": "2025-09-10 02:28:39.511840", "step": 2579, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:39.541531", "step": 2579, "epoch": 2 }, { "type": "loss", "content": 0.032466478645801544, "timestamp": "2025-09-10 02:28:39.564925", "step": 2580, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:39.594126", "step": 2580, "epoch": 2 }, { "type": "loss", "content": 0.009972876869142056, "timestamp": "2025-09-10 02:28:39.595801", "step": 2581, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:39.624394", "step": 2581, "epoch": 2 }, { "type": "loss", "content": 0.01930398680269718, "timestamp": "2025-09-10 02:28:39.626111", "step": 2582, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:39.655706", "step": 2582, "epoch": 2 }, { "type": "loss", "content": 0.010050845332443714, "timestamp": "2025-09-10 02:28:39.657625", "step": 2583, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:39.685981", "step": 2583, "epoch": 2 }, { "type": "loss", "content": 0.011527531780302525, "timestamp": "2025-09-10 02:28:39.709309", "step": 2584, "epoch": 2 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 96 ], "batch_size": 8, "flops": 1898404492032 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 5, 80 ], "batch_size": 8, "flops": 1582003754624 } ], "timestamp": "2025-09-10 02:28:41.597481", "step": 2584, "epoch": 2 }, { "type": "pplx", "content": 2603673.316314668, "timestamp": "2025-09-10 02:28:41.608467", "step": 2584, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:41.638879", "step": 2584, "epoch": 2 }, { "type": "loss", "content": 0.0049915858544409275, "timestamp": "2025-09-10 02:28:41.647162", "step": 2585, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:41.685707", "step": 2585, "epoch": 2 }, { "type": "loss", "content": 0.0036541339941322803, "timestamp": "2025-09-10 02:28:41.687624", "step": 2586, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:41.737483", "step": 2586, "epoch": 2 }, { "type": "loss", "content": 0.011626863852143288, "timestamp": "2025-09-10 02:28:41.739392", "step": 2587, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:41.768215", "step": 2587, "epoch": 2 }, { "type": "loss", "content": 0.00713756587356329, "timestamp": "2025-09-10 02:28:41.791820", "step": 2588, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:41.821547", "step": 2588, "epoch": 2 }, { "type": "loss", "content": 0.03513413295149803, "timestamp": "2025-09-10 02:28:41.823161", "step": 2589, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:41.851579", "step": 2589, "epoch": 2 }, { "type": "loss", "content": 0.03290729597210884, "timestamp": "2025-09-10 02:28:41.853235", "step": 2590, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:41.887869", "step": 2590, "epoch": 2 }, { "type": "loss", "content": 0.011574178002774715, "timestamp": "2025-09-10 02:28:41.889489", "step": 2591, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:41.926275", "step": 2591, "epoch": 2 }, { "type": "loss", "content": 0.03582843765616417, "timestamp": "2025-09-10 02:28:41.949708", "step": 2592, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:41.979412", "step": 2592, "epoch": 2 }, { "type": "loss", "content": 0.007640053052455187, "timestamp": "2025-09-10 02:28:41.981192", "step": 2593, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:42.010940", "step": 2593, "epoch": 2 }, { "type": "loss", "content": 0.0010189699241891503, "timestamp": "2025-09-10 02:28:42.013409", "step": 2594, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:42.043256", "step": 2594, "epoch": 2 }, { "type": "loss", "content": 0.01183264423161745, "timestamp": "2025-09-10 02:28:42.046844", "step": 2595, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:42.076579", "step": 2595, "epoch": 2 }, { "type": "loss", "content": 0.024438226595520973, "timestamp": "2025-09-10 02:28:42.099875", "step": 2596, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:42.128732", "step": 2596, "epoch": 2 }, { "type": "loss", "content": 0.0017751975683495402, "timestamp": "2025-09-10 02:28:42.130670", "step": 2597, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:42.158966", "step": 2597, "epoch": 2 }, { "type": "loss", "content": 0.009779882617294788, "timestamp": "2025-09-10 02:28:42.160781", "step": 2598, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:42.189417", "step": 2598, "epoch": 2 }, { "type": "loss", "content": 0.010804369114339352, "timestamp": "2025-09-10 02:28:42.191848", "step": 2599, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:42.220605", "step": 2599, "epoch": 2 }, { "type": "loss", "content": 0.008427734486758709, "timestamp": "2025-09-10 02:28:42.243693", "step": 2600, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:42.278359", "step": 2600, "epoch": 2 }, { "type": "loss", "content": 0.009223179891705513, "timestamp": "2025-09-10 02:28:42.280435", "step": 2601, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:42.309506", "step": 2601, "epoch": 2 }, { "type": "loss", "content": 0.006789051927626133, "timestamp": "2025-09-10 02:28:42.311319", "step": 2602, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:42.340236", "step": 2602, "epoch": 2 }, { "type": "loss", "content": 0.013294040225446224, "timestamp": "2025-09-10 02:28:42.342554", "step": 2603, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:42.374724", "step": 2603, "epoch": 2 }, { "type": "loss", "content": 0.01670178398489952, "timestamp": "2025-09-10 02:28:42.398146", "step": 2604, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:42.426579", "step": 2604, "epoch": 2 }, { "type": "loss", "content": 0.04589006304740906, "timestamp": "2025-09-10 02:28:42.428103", "step": 2605, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:42.461572", "step": 2605, "epoch": 2 }, { "type": "loss", "content": 0.005605275277048349, "timestamp": "2025-09-10 02:28:42.467525", "step": 2606, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:42.504863", "step": 2606, "epoch": 2 }, { "type": "loss", "content": 0.015701550990343094, "timestamp": "2025-09-10 02:28:42.507084", "step": 2607, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:42.540207", "step": 2607, "epoch": 2 }, { "type": "loss", "content": 0.0018044470343738794, "timestamp": "2025-09-10 02:28:42.563739", "step": 2608, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:42.592598", "step": 2608, "epoch": 2 }, { "type": "loss", "content": 0.033574432134628296, "timestamp": "2025-09-10 02:28:42.594103", "step": 2609, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:42.622566", "step": 2609, "epoch": 2 }, { "type": "loss", "content": 0.001270159031264484, "timestamp": "2025-09-10 02:28:42.624089", "step": 2610, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:42.652470", "step": 2610, "epoch": 2 }, { "type": "loss", "content": 0.012809211388230324, "timestamp": "2025-09-10 02:28:42.654135", "step": 2611, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:42.683895", "step": 2611, "epoch": 2 }, { "type": "loss", "content": 0.0007309060310944915, "timestamp": "2025-09-10 02:28:42.713356", "step": 2612, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:42.746101", "step": 2612, "epoch": 2 }, { "type": "loss", "content": 0.010813341476023197, "timestamp": "2025-09-10 02:28:42.748359", "step": 2613, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:42.777580", "step": 2613, "epoch": 2 }, { "type": "loss", "content": 0.0004470552667044103, "timestamp": "2025-09-10 02:28:42.783260", "step": 2614, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:42.815170", "step": 2614, "epoch": 2 }, { "type": "loss", "content": 0.0004041774955112487, "timestamp": "2025-09-10 02:28:42.818957", "step": 2615, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:42.848473", "step": 2615, "epoch": 2 }, { "type": "loss", "content": 0.008302075788378716, "timestamp": "2025-09-10 02:28:42.871884", "step": 2616, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:42.902214", "step": 2616, "epoch": 2 }, { "type": "loss", "content": 0.0021110589150339365, "timestamp": "2025-09-10 02:28:42.905727", "step": 2617, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:42.934877", "step": 2617, "epoch": 2 }, { "type": "loss", "content": 0.019456753507256508, "timestamp": "2025-09-10 02:28:42.936953", "step": 2618, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:42.975995", "step": 2618, "epoch": 2 }, { "type": "loss", "content": 0.006132456008344889, "timestamp": "2025-09-10 02:28:42.978973", "step": 2619, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:43.009909", "step": 2619, "epoch": 2 }, { "type": "loss", "content": 0.024464385583996773, "timestamp": "2025-09-10 02:28:43.036750", "step": 2620, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:43.076475", "step": 2620, "epoch": 2 }, { "type": "loss", "content": 0.017063161358237267, "timestamp": "2025-09-10 02:28:43.078983", "step": 2621, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:43.111287", "step": 2621, "epoch": 2 }, { "type": "loss", "content": 0.002038109814748168, "timestamp": "2025-09-10 02:28:43.116146", "step": 2622, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:43.145537", "step": 2622, "epoch": 2 }, { "type": "loss", "content": 0.0004370710230432451, "timestamp": "2025-09-10 02:28:43.148788", "step": 2623, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:43.183874", "step": 2623, "epoch": 2 }, { "type": "loss", "content": 0.039566520601511, "timestamp": "2025-09-10 02:28:43.210745", "step": 2624, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:43.243277", "step": 2624, "epoch": 2 }, { "type": "loss", "content": 0.0052605862729251385, "timestamp": "2025-09-10 02:28:43.245879", "step": 2625, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:43.280556", "step": 2625, "epoch": 2 }, { "type": "loss", "content": 0.008730943314731121, "timestamp": "2025-09-10 02:28:43.282263", "step": 2626, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:43.312425", "step": 2626, "epoch": 2 }, { "type": "loss", "content": 0.040866997092962265, "timestamp": "2025-09-10 02:28:43.318297", "step": 2627, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:43.358041", "step": 2627, "epoch": 2 }, { "type": "loss", "content": 0.0032515223138034344, "timestamp": "2025-09-10 02:28:43.386299", "step": 2628, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:43.420448", "step": 2628, "epoch": 2 }, { "type": "loss", "content": 0.04348122701048851, "timestamp": "2025-09-10 02:28:43.426300", "step": 2629, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:43.463200", "step": 2629, "epoch": 2 }, { "type": "loss", "content": 0.014161914587020874, "timestamp": "2025-09-10 02:28:43.465157", "step": 2630, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:43.495635", "step": 2630, "epoch": 2 }, { "type": "loss", "content": 0.023532714694738388, "timestamp": "2025-09-10 02:28:43.497661", "step": 2631, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:43.527643", "step": 2631, "epoch": 2 }, { "type": "loss", "content": 0.058165520429611206, "timestamp": "2025-09-10 02:28:43.551057", "step": 2632, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:43.581064", "step": 2632, "epoch": 2 }, { "type": "loss", "content": 0.006530240178108215, "timestamp": "2025-09-10 02:28:43.582932", "step": 2633, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:43.613070", "step": 2633, "epoch": 2 }, { "type": "loss", "content": 0.03045285865664482, "timestamp": "2025-09-10 02:28:43.614841", "step": 2634, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:43.646399", "step": 2634, "epoch": 2 }, { "type": "loss", "content": 0.06831801682710648, "timestamp": "2025-09-10 02:28:43.659248", "step": 2635, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:43.694918", "step": 2635, "epoch": 2 }, { "type": "loss", "content": 0.05904409661889076, "timestamp": "2025-09-10 02:28:43.720778", "step": 2636, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:43.761703", "step": 2636, "epoch": 2 }, { "type": "loss", "content": 0.00957201886922121, "timestamp": "2025-09-10 02:28:43.765723", "step": 2637, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:43.801316", "step": 2637, "epoch": 2 }, { "type": "loss", "content": 0.01836211048066616, "timestamp": "2025-09-10 02:28:43.803463", "step": 2638, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:43.833454", "step": 2638, "epoch": 2 }, { "type": "loss", "content": 0.028037531301379204, "timestamp": "2025-09-10 02:28:43.839860", "step": 2639, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:43.871700", "step": 2639, "epoch": 2 }, { "type": "loss", "content": 0.007020160555839539, "timestamp": "2025-09-10 02:28:43.896266", "step": 2640, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:43.928024", "step": 2640, "epoch": 2 }, { "type": "loss", "content": 0.0023041670210659504, "timestamp": "2025-09-10 02:28:43.930707", "step": 2641, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:43.964816", "step": 2641, "epoch": 2 }, { "type": "loss", "content": 0.012306435965001583, "timestamp": "2025-09-10 02:28:43.966575", "step": 2642, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:43.999362", "step": 2642, "epoch": 2 }, { "type": "loss", "content": 0.023700138553977013, "timestamp": "2025-09-10 02:28:44.001229", "step": 2643, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:44.030000", "step": 2643, "epoch": 2 }, { "type": "loss", "content": 0.006072094198316336, "timestamp": "2025-09-10 02:28:44.057684", "step": 2644, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:44.089169", "step": 2644, "epoch": 2 }, { "type": "loss", "content": 0.0027935353573411703, "timestamp": "2025-09-10 02:28:44.091396", "step": 2645, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:44.120788", "step": 2645, "epoch": 2 }, { "type": "loss", "content": 0.006508468184620142, "timestamp": "2025-09-10 02:28:44.122809", "step": 2646, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:44.154913", "step": 2646, "epoch": 2 }, { "type": "loss", "content": 0.04970017448067665, "timestamp": "2025-09-10 02:28:44.156851", "step": 2647, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:44.186096", "step": 2647, "epoch": 2 }, { "type": "loss", "content": 0.012843841686844826, "timestamp": "2025-09-10 02:28:44.211472", "step": 2648, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:44.244124", "step": 2648, "epoch": 2 }, { "type": "loss", "content": 0.015078941360116005, "timestamp": "2025-09-10 02:28:44.248137", "step": 2649, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:44.278048", "step": 2649, "epoch": 2 }, { "type": "loss", "content": 0.07857070863246918, "timestamp": "2025-09-10 02:28:44.279840", "step": 2650, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:44.308643", "step": 2650, "epoch": 2 }, { "type": "loss", "content": 0.05733145400881767, "timestamp": "2025-09-10 02:28:44.318771", "step": 2651, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:44.357872", "step": 2651, "epoch": 2 }, { "type": "loss", "content": 0.005751179065555334, "timestamp": "2025-09-10 02:28:44.381326", "step": 2652, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:44.410397", "step": 2652, "epoch": 2 }, { "type": "loss", "content": 0.005302275065332651, "timestamp": "2025-09-10 02:28:44.412235", "step": 2653, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:44.441003", "step": 2653, "epoch": 2 }, { "type": "loss", "content": 0.012114384211599827, "timestamp": "2025-09-10 02:28:44.442897", "step": 2654, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:44.471813", "step": 2654, "epoch": 2 }, { "type": "loss", "content": 0.030837317928671837, "timestamp": "2025-09-10 02:28:44.474080", "step": 2655, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:44.503113", "step": 2655, "epoch": 2 }, { "type": "loss", "content": 0.08814512938261032, "timestamp": "2025-09-10 02:28:44.526592", "step": 2656, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:44.560619", "step": 2656, "epoch": 2 }, { "type": "loss", "content": 0.020036103203892708, "timestamp": "2025-09-10 02:28:44.566262", "step": 2657, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:44.595571", "step": 2657, "epoch": 2 }, { "type": "loss", "content": 0.0062383851036429405, "timestamp": "2025-09-10 02:28:44.603606", "step": 2658, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:44.634411", "step": 2658, "epoch": 2 }, { "type": "loss", "content": 0.01709858886897564, "timestamp": "2025-09-10 02:28:44.639660", "step": 2659, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:44.670717", "step": 2659, "epoch": 2 }, { "type": "loss", "content": 0.017863783985376358, "timestamp": "2025-09-10 02:28:44.694853", "step": 2660, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:44.728640", "step": 2660, "epoch": 2 }, { "type": "loss", "content": 0.07673000544309616, "timestamp": "2025-09-10 02:28:44.730596", "step": 2661, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:44.759636", "step": 2661, "epoch": 2 }, { "type": "loss", "content": 0.007981250993907452, "timestamp": "2025-09-10 02:28:44.764247", "step": 2662, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:44.799874", "step": 2662, "epoch": 2 }, { "type": "loss", "content": 0.05993294715881348, "timestamp": "2025-09-10 02:28:44.801504", "step": 2663, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:44.830696", "step": 2663, "epoch": 2 }, { "type": "loss", "content": 0.03284836187958717, "timestamp": "2025-09-10 02:28:44.854047", "step": 2664, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:44.892058", "step": 2664, "epoch": 2 }, { "type": "loss", "content": 0.011232273653149605, "timestamp": "2025-09-10 02:28:44.893793", "step": 2665, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:44.927935", "step": 2665, "epoch": 2 }, { "type": "loss", "content": 0.03330313041806221, "timestamp": "2025-09-10 02:28:44.930654", "step": 2666, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:44.969140", "step": 2666, "epoch": 2 }, { "type": "loss", "content": 0.0025289016775786877, "timestamp": "2025-09-10 02:28:44.972991", "step": 2667, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:45.005680", "step": 2667, "epoch": 2 }, { "type": "loss", "content": 0.018535206094384193, "timestamp": "2025-09-10 02:28:45.028991", "step": 2668, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:45.082702", "step": 2668, "epoch": 2 }, { "type": "loss", "content": 0.025767965242266655, "timestamp": "2025-09-10 02:28:45.086962", "step": 2669, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:45.116305", "step": 2669, "epoch": 2 }, { "type": "loss", "content": 0.01067658793181181, "timestamp": "2025-09-10 02:28:45.118166", "step": 2670, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:45.147155", "step": 2670, "epoch": 2 }, { "type": "loss", "content": 0.02955467812716961, "timestamp": "2025-09-10 02:28:45.148871", "step": 2671, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:45.177530", "step": 2671, "epoch": 2 }, { "type": "loss", "content": 0.0016258393879979849, "timestamp": "2025-09-10 02:28:45.200842", "step": 2672, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:45.232223", "step": 2672, "epoch": 2 }, { "type": "loss", "content": 0.009552685543894768, "timestamp": "2025-09-10 02:28:45.234110", "step": 2673, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:45.262919", "step": 2673, "epoch": 2 }, { "type": "loss", "content": 0.010288403369486332, "timestamp": "2025-09-10 02:28:45.264812", "step": 2674, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:45.295388", "step": 2674, "epoch": 2 }, { "type": "loss", "content": 0.05034352466464043, "timestamp": "2025-09-10 02:28:45.297408", "step": 2675, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:45.326872", "step": 2675, "epoch": 2 }, { "type": "loss", "content": 0.003926003817468882, "timestamp": "2025-09-10 02:28:45.350413", "step": 2676, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:45.380821", "step": 2676, "epoch": 2 }, { "type": "loss", "content": 0.0008642165921628475, "timestamp": "2025-09-10 02:28:45.382766", "step": 2677, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:45.415963", "step": 2677, "epoch": 2 }, { "type": "loss", "content": 0.029930176213383675, "timestamp": "2025-09-10 02:28:45.417815", "step": 2678, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:45.447174", "step": 2678, "epoch": 2 }, { "type": "loss", "content": 0.01474265567958355, "timestamp": "2025-09-10 02:28:45.448930", "step": 2679, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:45.478405", "step": 2679, "epoch": 2 }, { "type": "loss", "content": 0.012907499447464943, "timestamp": "2025-09-10 02:28:45.501971", "step": 2680, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:45.534371", "step": 2680, "epoch": 2 }, { "type": "loss", "content": 0.039185430854558945, "timestamp": "2025-09-10 02:28:45.540662", "step": 2681, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:45.577291", "step": 2681, "epoch": 2 }, { "type": "loss", "content": 0.02835225686430931, "timestamp": "2025-09-10 02:28:45.579395", "step": 2682, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:45.609186", "step": 2682, "epoch": 2 }, { "type": "loss", "content": 0.02721690945327282, "timestamp": "2025-09-10 02:28:45.612641", "step": 2683, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:45.645024", "step": 2683, "epoch": 2 }, { "type": "loss", "content": 0.03995153307914734, "timestamp": "2025-09-10 02:28:45.669533", "step": 2684, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:45.698932", "step": 2684, "epoch": 2 }, { "type": "loss", "content": 0.028712604194879532, "timestamp": "2025-09-10 02:28:45.701971", "step": 2685, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:45.736733", "step": 2685, "epoch": 2 }, { "type": "loss", "content": 0.018331315368413925, "timestamp": "2025-09-10 02:28:45.738577", "step": 2686, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:45.767531", "step": 2686, "epoch": 2 }, { "type": "loss", "content": 0.03021402657032013, "timestamp": "2025-09-10 02:28:45.770332", "step": 2687, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:45.799580", "step": 2687, "epoch": 2 }, { "type": "loss", "content": 0.053654056042432785, "timestamp": "2025-09-10 02:28:45.823051", "step": 2688, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:45.865111", "step": 2688, "epoch": 2 }, { "type": "loss", "content": 0.06579221040010452, "timestamp": "2025-09-10 02:28:45.871938", "step": 2689, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:45.902535", "step": 2689, "epoch": 2 }, { "type": "loss", "content": 0.004453813191503286, "timestamp": "2025-09-10 02:28:45.904700", "step": 2690, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:45.933807", "step": 2690, "epoch": 2 }, { "type": "loss", "content": 0.027801888063549995, "timestamp": "2025-09-10 02:28:45.937495", "step": 2691, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:45.968546", "step": 2691, "epoch": 2 }, { "type": "loss", "content": 0.026753781363368034, "timestamp": "2025-09-10 02:28:45.991887", "step": 2692, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:46.020984", "step": 2692, "epoch": 2 }, { "type": "loss", "content": 0.013489528559148312, "timestamp": "2025-09-10 02:28:46.024261", "step": 2693, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:46.055378", "step": 2693, "epoch": 2 }, { "type": "loss", "content": 0.01014415267854929, "timestamp": "2025-09-10 02:28:46.057229", "step": 2694, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:46.086052", "step": 2694, "epoch": 2 }, { "type": "loss", "content": 0.01844717375934124, "timestamp": "2025-09-10 02:28:46.087864", "step": 2695, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:46.117600", "step": 2695, "epoch": 2 }, { "type": "loss", "content": 0.02382180653512478, "timestamp": "2025-09-10 02:28:46.140928", "step": 2696, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:46.176893", "step": 2696, "epoch": 2 }, { "type": "loss", "content": 0.05853342264890671, "timestamp": "2025-09-10 02:28:46.178790", "step": 2697, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:46.210009", "step": 2697, "epoch": 2 }, { "type": "loss", "content": 0.014555533416569233, "timestamp": "2025-09-10 02:28:46.214423", "step": 2698, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:46.245417", "step": 2698, "epoch": 2 }, { "type": "loss", "content": 0.027585254982113838, "timestamp": "2025-09-10 02:28:46.249265", "step": 2699, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:46.281344", "step": 2699, "epoch": 2 }, { "type": "loss", "content": 0.02432733215391636, "timestamp": "2025-09-10 02:28:46.304864", "step": 2700, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:46.336204", "step": 2700, "epoch": 2 }, { "type": "loss", "content": 0.018436763435602188, "timestamp": "2025-09-10 02:28:46.339532", "step": 2701, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:46.381338", "step": 2701, "epoch": 2 }, { "type": "loss", "content": 0.005758058279752731, "timestamp": "2025-09-10 02:28:46.383259", "step": 2702, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:46.412278", "step": 2702, "epoch": 2 }, { "type": "loss", "content": 0.018294544890522957, "timestamp": "2025-09-10 02:28:46.415349", "step": 2703, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:46.451384", "step": 2703, "epoch": 2 }, { "type": "loss", "content": 0.003365014912560582, "timestamp": "2025-09-10 02:28:46.475726", "step": 2704, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:46.510522", "step": 2704, "epoch": 2 }, { "type": "loss", "content": 0.025826290249824524, "timestamp": "2025-09-10 02:28:46.519224", "step": 2705, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:46.550744", "step": 2705, "epoch": 2 }, { "type": "loss", "content": 0.04405367746949196, "timestamp": "2025-09-10 02:28:46.552855", "step": 2706, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:46.581725", "step": 2706, "epoch": 2 }, { "type": "loss", "content": 0.013567561283707619, "timestamp": "2025-09-10 02:28:46.583336", "step": 2707, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:46.618383", "step": 2707, "epoch": 2 }, { "type": "loss", "content": 0.005779425147920847, "timestamp": "2025-09-10 02:28:46.641726", "step": 2708, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:46.670826", "step": 2708, "epoch": 2 }, { "type": "loss", "content": 0.007377541624009609, "timestamp": "2025-09-10 02:28:46.676004", "step": 2709, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:46.706217", "step": 2709, "epoch": 2 }, { "type": "loss", "content": 0.019734172150492668, "timestamp": "2025-09-10 02:28:46.710656", "step": 2710, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:46.742635", "step": 2710, "epoch": 2 }, { "type": "loss", "content": 0.03871343284845352, "timestamp": "2025-09-10 02:28:46.747148", "step": 2711, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:46.783716", "step": 2711, "epoch": 2 }, { "type": "loss", "content": 0.00602757278829813, "timestamp": "2025-09-10 02:28:46.807334", "step": 2712, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:46.839400", "step": 2712, "epoch": 2 }, { "type": "loss", "content": 0.07382923364639282, "timestamp": "2025-09-10 02:28:46.841522", "step": 2713, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:46.871126", "step": 2713, "epoch": 2 }, { "type": "loss", "content": 0.021069372072815895, "timestamp": "2025-09-10 02:28:46.875169", "step": 2714, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:46.910015", "step": 2714, "epoch": 2 }, { "type": "loss", "content": 0.0741237998008728, "timestamp": "2025-09-10 02:28:46.911900", "step": 2715, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:46.941995", "step": 2715, "epoch": 2 }, { "type": "loss", "content": 0.014232861809432507, "timestamp": "2025-09-10 02:28:46.966506", "step": 2716, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:47.009606", "step": 2716, "epoch": 2 }, { "type": "loss", "content": 0.00317375804297626, "timestamp": "2025-09-10 02:28:47.014529", "step": 2717, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:47.047077", "step": 2717, "epoch": 2 }, { "type": "loss", "content": 0.06315583735704422, "timestamp": "2025-09-10 02:28:47.055060", "step": 2718, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:47.085452", "step": 2718, "epoch": 2 }, { "type": "loss", "content": 0.02573302760720253, "timestamp": "2025-09-10 02:28:47.087579", "step": 2719, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:47.116638", "step": 2719, "epoch": 2 }, { "type": "loss", "content": 0.03362132981419563, "timestamp": "2025-09-10 02:28:47.140382", "step": 2720, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:47.174223", "step": 2720, "epoch": 2 }, { "type": "loss", "content": 0.029691146686673164, "timestamp": "2025-09-10 02:28:47.176187", "step": 2721, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:47.207163", "step": 2721, "epoch": 2 }, { "type": "loss", "content": 0.00339910970069468, "timestamp": "2025-09-10 02:28:47.212551", "step": 2722, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:47.246462", "step": 2722, "epoch": 2 }, { "type": "loss", "content": 0.0015478292480111122, "timestamp": "2025-09-10 02:28:47.248406", "step": 2723, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:47.277625", "step": 2723, "epoch": 2 }, { "type": "loss", "content": 0.0221834909170866, "timestamp": "2025-09-10 02:28:47.301308", "step": 2724, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:47.330698", "step": 2724, "epoch": 2 }, { "type": "loss", "content": 0.025360483676195145, "timestamp": "2025-09-10 02:28:47.332558", "step": 2725, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:47.361448", "step": 2725, "epoch": 2 }, { "type": "loss", "content": 0.026857640594244003, "timestamp": "2025-09-10 02:28:47.363223", "step": 2726, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:47.392422", "step": 2726, "epoch": 2 }, { "type": "loss", "content": 0.011470100842416286, "timestamp": "2025-09-10 02:28:47.394904", "step": 2727, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:47.424160", "step": 2727, "epoch": 2 }, { "type": "loss", "content": 0.00516451196745038, "timestamp": "2025-09-10 02:28:47.448964", "step": 2728, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:47.477639", "step": 2728, "epoch": 2 }, { "type": "loss", "content": 0.016428111121058464, "timestamp": "2025-09-10 02:28:47.479460", "step": 2729, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:47.508399", "step": 2729, "epoch": 2 }, { "type": "loss", "content": 0.04734635725617409, "timestamp": "2025-09-10 02:28:47.513125", "step": 2730, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:47.549549", "step": 2730, "epoch": 2 }, { "type": "loss", "content": 0.02643342688679695, "timestamp": "2025-09-10 02:28:47.554239", "step": 2731, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:47.592227", "step": 2731, "epoch": 2 }, { "type": "loss", "content": 0.02251642383635044, "timestamp": "2025-09-10 02:28:47.615449", "step": 2732, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:47.646933", "step": 2732, "epoch": 2 }, { "type": "loss", "content": 0.014357469975948334, "timestamp": "2025-09-10 02:28:47.655428", "step": 2733, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:47.686078", "step": 2733, "epoch": 2 }, { "type": "loss", "content": 0.0158222708851099, "timestamp": "2025-09-10 02:28:47.688788", "step": 2734, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:47.718221", "step": 2734, "epoch": 2 }, { "type": "loss", "content": 0.0061955139972269535, "timestamp": "2025-09-10 02:28:47.720457", "step": 2735, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:47.749479", "step": 2735, "epoch": 2 }, { "type": "loss", "content": 0.006888777483254671, "timestamp": "2025-09-10 02:28:47.773040", "step": 2736, "epoch": 2 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 96 ], "batch_size": 8, "flops": 1898404492032 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 5, 80 ], "batch_size": 8, "flops": 1582003754624 } ], "timestamp": "2025-09-10 02:28:49.698666", "step": 2736, "epoch": 2 }, { "type": "pplx", "content": 2556436.4143981044, "timestamp": "2025-09-10 02:28:49.700535", "step": 2736, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:49.728274", "step": 2736, "epoch": 2 }, { "type": "loss", "content": 0.0028547674883157015, "timestamp": "2025-09-10 02:28:49.730163", "step": 2737, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:49.759271", "step": 2737, "epoch": 2 }, { "type": "loss", "content": 0.013122498989105225, "timestamp": "2025-09-10 02:28:49.761114", "step": 2738, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:49.790063", "step": 2738, "epoch": 2 }, { "type": "loss", "content": 0.02464980073273182, "timestamp": "2025-09-10 02:28:49.791883", "step": 2739, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:49.820976", "step": 2739, "epoch": 2 }, { "type": "loss", "content": 0.004556159023195505, "timestamp": "2025-09-10 02:28:49.844478", "step": 2740, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:49.873459", "step": 2740, "epoch": 2 }, { "type": "loss", "content": 0.04443352296948433, "timestamp": "2025-09-10 02:28:49.875229", "step": 2741, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:49.904687", "step": 2741, "epoch": 2 }, { "type": "loss", "content": 0.067392498254776, "timestamp": "2025-09-10 02:28:49.906918", "step": 2742, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:49.936338", "step": 2742, "epoch": 2 }, { "type": "loss", "content": 0.003829964669421315, "timestamp": "2025-09-10 02:28:49.938339", "step": 2743, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:49.967572", "step": 2743, "epoch": 2 }, { "type": "loss", "content": 0.0066711571998894215, "timestamp": "2025-09-10 02:28:49.991227", "step": 2744, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:50.020217", "step": 2744, "epoch": 2 }, { "type": "loss", "content": 0.01632828451693058, "timestamp": "2025-09-10 02:28:50.022102", "step": 2745, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:50.051172", "step": 2745, "epoch": 2 }, { "type": "loss", "content": 0.02845684252679348, "timestamp": "2025-09-10 02:28:50.053060", "step": 2746, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:50.082236", "step": 2746, "epoch": 2 }, { "type": "loss", "content": 0.026504751294851303, "timestamp": "2025-09-10 02:28:50.084024", "step": 2747, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:50.112882", "step": 2747, "epoch": 2 }, { "type": "loss", "content": 0.006421736441552639, "timestamp": "2025-09-10 02:28:50.136319", "step": 2748, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:50.165335", "step": 2748, "epoch": 2 }, { "type": "loss", "content": 0.024044236168265343, "timestamp": "2025-09-10 02:28:50.167389", "step": 2749, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:50.196717", "step": 2749, "epoch": 2 }, { "type": "loss", "content": 0.0037590719293802977, "timestamp": "2025-09-10 02:28:50.198603", "step": 2750, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:50.228009", "step": 2750, "epoch": 2 }, { "type": "loss", "content": 0.007875211536884308, "timestamp": "2025-09-10 02:28:50.230193", "step": 2751, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:50.259813", "step": 2751, "epoch": 2 }, { "type": "loss", "content": 0.01248131226748228, "timestamp": "2025-09-10 02:28:50.282915", "step": 2752, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:50.312401", "step": 2752, "epoch": 2 }, { "type": "loss", "content": 0.013336045667529106, "timestamp": "2025-09-10 02:28:50.314253", "step": 2753, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:50.343634", "step": 2753, "epoch": 2 }, { "type": "loss", "content": 0.03355873376131058, "timestamp": "2025-09-10 02:28:50.345549", "step": 2754, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:50.374450", "step": 2754, "epoch": 2 }, { "type": "loss", "content": 0.004387472756206989, "timestamp": "2025-09-10 02:28:50.376202", "step": 2755, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:50.404920", "step": 2755, "epoch": 2 }, { "type": "loss", "content": 0.0051027326844632626, "timestamp": "2025-09-10 02:28:50.428117", "step": 2756, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:50.457182", "step": 2756, "epoch": 2 }, { "type": "loss", "content": 0.006544017698615789, "timestamp": "2025-09-10 02:28:50.459200", "step": 2757, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:50.490368", "step": 2757, "epoch": 2 }, { "type": "loss", "content": 0.011391127482056618, "timestamp": "2025-09-10 02:28:50.492471", "step": 2758, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:50.521862", "step": 2758, "epoch": 2 }, { "type": "loss", "content": 0.0009497334249317646, "timestamp": "2025-09-10 02:28:50.523665", "step": 2759, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:50.552692", "step": 2759, "epoch": 2 }, { "type": "loss", "content": 0.0008985889726318419, "timestamp": "2025-09-10 02:28:50.576388", "step": 2760, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:50.605634", "step": 2760, "epoch": 2 }, { "type": "loss", "content": 0.04270598292350769, "timestamp": "2025-09-10 02:28:50.607464", "step": 2761, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:50.636890", "step": 2761, "epoch": 2 }, { "type": "loss", "content": 0.053190361708402634, "timestamp": "2025-09-10 02:28:50.638673", "step": 2762, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:50.667640", "step": 2762, "epoch": 2 }, { "type": "loss", "content": 0.01089306827634573, "timestamp": "2025-09-10 02:28:50.669449", "step": 2763, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:50.699134", "step": 2763, "epoch": 2 }, { "type": "loss", "content": 0.008394693024456501, "timestamp": "2025-09-10 02:28:50.722548", "step": 2764, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:50.751523", "step": 2764, "epoch": 2 }, { "type": "loss", "content": 0.06284545361995697, "timestamp": "2025-09-10 02:28:50.753458", "step": 2765, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:50.782552", "step": 2765, "epoch": 2 }, { "type": "loss", "content": 0.03257061913609505, "timestamp": "2025-09-10 02:28:50.784475", "step": 2766, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:50.813339", "step": 2766, "epoch": 2 }, { "type": "loss", "content": 0.0028790233191102743, "timestamp": "2025-09-10 02:28:50.815136", "step": 2767, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:50.844180", "step": 2767, "epoch": 2 }, { "type": "loss", "content": 0.00551459938287735, "timestamp": "2025-09-10 02:28:50.867438", "step": 2768, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:50.896645", "step": 2768, "epoch": 2 }, { "type": "loss", "content": 0.001208897796459496, "timestamp": "2025-09-10 02:28:50.898545", "step": 2769, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:50.928898", "step": 2769, "epoch": 2 }, { "type": "loss", "content": 0.03900022432208061, "timestamp": "2025-09-10 02:28:50.930683", "step": 2770, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:50.959635", "step": 2770, "epoch": 2 }, { "type": "loss", "content": 0.03883006051182747, "timestamp": "2025-09-10 02:28:50.961633", "step": 2771, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:50.991236", "step": 2771, "epoch": 2 }, { "type": "loss", "content": 0.03494889289140701, "timestamp": "2025-09-10 02:28:51.014827", "step": 2772, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:51.044028", "step": 2772, "epoch": 2 }, { "type": "loss", "content": 0.05873110517859459, "timestamp": "2025-09-10 02:28:51.045937", "step": 2773, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:51.075609", "step": 2773, "epoch": 2 }, { "type": "loss", "content": 0.0463673397898674, "timestamp": "2025-09-10 02:28:51.077487", "step": 2774, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:51.106486", "step": 2774, "epoch": 2 }, { "type": "loss", "content": 0.034762509167194366, "timestamp": "2025-09-10 02:28:51.108366", "step": 2775, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:51.137704", "step": 2775, "epoch": 2 }, { "type": "loss", "content": 0.0016691697528585792, "timestamp": "2025-09-10 02:28:51.160974", "step": 2776, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:51.193222", "step": 2776, "epoch": 2 }, { "type": "loss", "content": 0.0006479284493252635, "timestamp": "2025-09-10 02:28:51.195165", "step": 2777, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:51.224121", "step": 2777, "epoch": 2 }, { "type": "loss", "content": 0.0053579783998429775, "timestamp": "2025-09-10 02:28:51.226191", "step": 2778, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:51.255044", "step": 2778, "epoch": 2 }, { "type": "loss", "content": 0.019901324063539505, "timestamp": "2025-09-10 02:28:51.256844", "step": 2779, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:51.285700", "step": 2779, "epoch": 2 }, { "type": "loss", "content": 0.010306313633918762, "timestamp": "2025-09-10 02:28:51.309289", "step": 2780, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:51.338821", "step": 2780, "epoch": 2 }, { "type": "loss", "content": 0.001360267517156899, "timestamp": "2025-09-10 02:28:51.340616", "step": 2781, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:51.369701", "step": 2781, "epoch": 2 }, { "type": "loss", "content": 0.00952989887446165, "timestamp": "2025-09-10 02:28:51.371445", "step": 2782, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:51.400921", "step": 2782, "epoch": 2 }, { "type": "loss", "content": 0.007531650364398956, "timestamp": "2025-09-10 02:28:51.402891", "step": 2783, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:51.432223", "step": 2783, "epoch": 2 }, { "type": "loss", "content": 0.017776241526007652, "timestamp": "2025-09-10 02:28:51.455671", "step": 2784, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:51.485004", "step": 2784, "epoch": 2 }, { "type": "loss", "content": 0.0023183736484497786, "timestamp": "2025-09-10 02:28:51.486796", "step": 2785, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:51.516060", "step": 2785, "epoch": 2 }, { "type": "loss", "content": 0.03633001446723938, "timestamp": "2025-09-10 02:28:51.518085", "step": 2786, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:51.547137", "step": 2786, "epoch": 2 }, { "type": "loss", "content": 0.007016249932348728, "timestamp": "2025-09-10 02:28:51.549078", "step": 2787, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:51.578310", "step": 2787, "epoch": 2 }, { "type": "loss", "content": 0.01564197801053524, "timestamp": "2025-09-10 02:28:51.601785", "step": 2788, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:51.630739", "step": 2788, "epoch": 2 }, { "type": "loss", "content": 0.011738909408450127, "timestamp": "2025-09-10 02:28:51.632499", "step": 2789, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:51.661477", "step": 2789, "epoch": 2 }, { "type": "loss", "content": 0.006292745471000671, "timestamp": "2025-09-10 02:28:51.663360", "step": 2790, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:51.692973", "step": 2790, "epoch": 2 }, { "type": "loss", "content": 0.004651935305446386, "timestamp": "2025-09-10 02:28:51.694819", "step": 2791, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:51.723863", "step": 2791, "epoch": 2 }, { "type": "loss", "content": 0.051345258951187134, "timestamp": "2025-09-10 02:28:51.747290", "step": 2792, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:51.776198", "step": 2792, "epoch": 2 }, { "type": "loss", "content": 0.01709223911166191, "timestamp": "2025-09-10 02:28:51.778441", "step": 2793, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:51.807343", "step": 2793, "epoch": 2 }, { "type": "loss", "content": 0.0029445867985486984, "timestamp": "2025-09-10 02:28:51.809237", "step": 2794, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:51.838439", "step": 2794, "epoch": 2 }, { "type": "loss", "content": 0.007461313623934984, "timestamp": "2025-09-10 02:28:51.840478", "step": 2795, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:51.869521", "step": 2795, "epoch": 2 }, { "type": "loss", "content": 0.005373707506805658, "timestamp": "2025-09-10 02:28:51.892561", "step": 2796, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:51.922523", "step": 2796, "epoch": 2 }, { "type": "loss", "content": 0.0008838448557071388, "timestamp": "2025-09-10 02:28:51.924271", "step": 2797, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:51.953435", "step": 2797, "epoch": 2 }, { "type": "loss", "content": 0.0061692483723163605, "timestamp": "2025-09-10 02:28:51.955428", "step": 2798, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:51.985341", "step": 2798, "epoch": 2 }, { "type": "loss", "content": 0.0168300811201334, "timestamp": "2025-09-10 02:28:51.987080", "step": 2799, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:52.016243", "step": 2799, "epoch": 2 }, { "type": "loss", "content": 0.0024048867635428905, "timestamp": "2025-09-10 02:28:52.039957", "step": 2800, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:52.069875", "step": 2800, "epoch": 2 }, { "type": "loss", "content": 0.02803078480064869, "timestamp": "2025-09-10 02:28:52.071946", "step": 2801, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:52.101033", "step": 2801, "epoch": 2 }, { "type": "loss", "content": 0.013601968996226788, "timestamp": "2025-09-10 02:28:52.102780", "step": 2802, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:52.131444", "step": 2802, "epoch": 2 }, { "type": "loss", "content": 0.004167321603745222, "timestamp": "2025-09-10 02:28:52.133360", "step": 2803, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:52.162566", "step": 2803, "epoch": 2 }, { "type": "loss", "content": 0.0028820219449698925, "timestamp": "2025-09-10 02:28:52.190505", "step": 2804, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:52.220204", "step": 2804, "epoch": 2 }, { "type": "loss", "content": 0.01132802851498127, "timestamp": "2025-09-10 02:28:52.222117", "step": 2805, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:52.253631", "step": 2805, "epoch": 2 }, { "type": "loss", "content": 0.027890155091881752, "timestamp": "2025-09-10 02:28:52.259984", "step": 2806, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:52.292024", "step": 2806, "epoch": 2 }, { "type": "loss", "content": 0.03422095999121666, "timestamp": "2025-09-10 02:28:52.293826", "step": 2807, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:52.322801", "step": 2807, "epoch": 2 }, { "type": "loss", "content": 0.006916854064911604, "timestamp": "2025-09-10 02:28:52.346028", "step": 2808, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:52.374833", "step": 2808, "epoch": 2 }, { "type": "loss", "content": 0.001855450333096087, "timestamp": "2025-09-10 02:28:52.376856", "step": 2809, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:52.420502", "step": 2809, "epoch": 2 }, { "type": "loss", "content": 0.012419261038303375, "timestamp": "2025-09-10 02:28:52.422856", "step": 2810, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:52.463506", "step": 2810, "epoch": 2 }, { "type": "loss", "content": 0.00299721397459507, "timestamp": "2025-09-10 02:28:52.465581", "step": 2811, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:52.497599", "step": 2811, "epoch": 2 }, { "type": "loss", "content": 0.005516386590898037, "timestamp": "2025-09-10 02:28:52.520804", "step": 2812, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:52.560451", "step": 2812, "epoch": 2 }, { "type": "loss", "content": 0.0005141026340425014, "timestamp": "2025-09-10 02:28:52.562428", "step": 2813, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:52.592480", "step": 2813, "epoch": 2 }, { "type": "loss", "content": 0.008127505891025066, "timestamp": "2025-09-10 02:28:52.594323", "step": 2814, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:52.623636", "step": 2814, "epoch": 2 }, { "type": "loss", "content": 0.013286756351590157, "timestamp": "2025-09-10 02:28:52.625824", "step": 2815, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:52.670341", "step": 2815, "epoch": 2 }, { "type": "loss", "content": 0.022446798160672188, "timestamp": "2025-09-10 02:28:52.694122", "step": 2816, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:52.723505", "step": 2816, "epoch": 2 }, { "type": "loss", "content": 0.001313581014983356, "timestamp": "2025-09-10 02:28:52.726877", "step": 2817, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:52.761927", "step": 2817, "epoch": 2 }, { "type": "loss", "content": 0.02478764019906521, "timestamp": "2025-09-10 02:28:52.763910", "step": 2818, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:52.792634", "step": 2818, "epoch": 2 }, { "type": "loss", "content": 0.006905491929501295, "timestamp": "2025-09-10 02:28:52.794523", "step": 2819, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:52.823582", "step": 2819, "epoch": 2 }, { "type": "loss", "content": 0.00363927218131721, "timestamp": "2025-09-10 02:28:52.847129", "step": 2820, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:52.876368", "step": 2820, "epoch": 2 }, { "type": "loss", "content": 0.03496801480650902, "timestamp": "2025-09-10 02:28:52.878756", "step": 2821, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:52.907614", "step": 2821, "epoch": 2 }, { "type": "loss", "content": 0.0012525822967290878, "timestamp": "2025-09-10 02:28:52.912313", "step": 2822, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:52.944580", "step": 2822, "epoch": 2 }, { "type": "loss", "content": 0.018051186576485634, "timestamp": "2025-09-10 02:28:52.946490", "step": 2823, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:52.975382", "step": 2823, "epoch": 2 }, { "type": "loss", "content": 0.03819189593195915, "timestamp": "2025-09-10 02:28:52.998738", "step": 2824, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:53.027724", "step": 2824, "epoch": 2 }, { "type": "loss", "content": 0.018002361059188843, "timestamp": "2025-09-10 02:28:53.029454", "step": 2825, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:53.058133", "step": 2825, "epoch": 2 }, { "type": "loss", "content": 0.051839083433151245, "timestamp": "2025-09-10 02:28:53.063182", "step": 2826, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:53.093877", "step": 2826, "epoch": 2 }, { "type": "loss", "content": 0.08625908941030502, "timestamp": "2025-09-10 02:28:53.098347", "step": 2827, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:53.129019", "step": 2827, "epoch": 2 }, { "type": "loss", "content": 0.005705358926206827, "timestamp": "2025-09-10 02:28:53.152525", "step": 2828, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:53.181545", "step": 2828, "epoch": 2 }, { "type": "loss", "content": 0.02136576734483242, "timestamp": "2025-09-10 02:28:53.184246", "step": 2829, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:53.214375", "step": 2829, "epoch": 2 }, { "type": "loss", "content": 0.0008014339837245643, "timestamp": "2025-09-10 02:28:53.219289", "step": 2830, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:53.249975", "step": 2830, "epoch": 2 }, { "type": "loss", "content": 0.024044450372457504, "timestamp": "2025-09-10 02:28:53.251842", "step": 2831, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:53.280824", "step": 2831, "epoch": 2 }, { "type": "loss", "content": 0.0004118950746487826, "timestamp": "2025-09-10 02:28:53.304796", "step": 2832, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:53.333454", "step": 2832, "epoch": 2 }, { "type": "loss", "content": 0.03739207237958908, "timestamp": "2025-09-10 02:28:53.335852", "step": 2833, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:53.365560", "step": 2833, "epoch": 2 }, { "type": "loss", "content": 0.004224942997097969, "timestamp": "2025-09-10 02:28:53.367454", "step": 2834, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:53.396231", "step": 2834, "epoch": 2 }, { "type": "loss", "content": 0.07634751498699188, "timestamp": "2025-09-10 02:28:53.398043", "step": 2835, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:53.427049", "step": 2835, "epoch": 2 }, { "type": "loss", "content": 0.0822276696562767, "timestamp": "2025-09-10 02:28:53.450585", "step": 2836, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:53.479761", "step": 2836, "epoch": 2 }, { "type": "loss", "content": 0.002085910877212882, "timestamp": "2025-09-10 02:28:53.481840", "step": 2837, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:53.510608", "step": 2837, "epoch": 2 }, { "type": "loss", "content": 0.0021981713362038136, "timestamp": "2025-09-10 02:28:53.512503", "step": 2838, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:53.541146", "step": 2838, "epoch": 2 }, { "type": "loss", "content": 0.01698652096092701, "timestamp": "2025-09-10 02:28:53.542975", "step": 2839, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:53.571811", "step": 2839, "epoch": 2 }, { "type": "loss", "content": 0.03516782820224762, "timestamp": "2025-09-10 02:28:53.597059", "step": 2840, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:28:53.626348", "step": 2840, "epoch": 2 }, { "type": "loss", "content": 0.0384167842566967, "timestamp": "2025-09-10 02:28:53.628193", "step": 2841, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:53.657194", "step": 2841, "epoch": 2 }, { "type": "loss", "content": 0.0008761967765167356, "timestamp": "2025-09-10 02:28:53.661970", "step": 2842, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:53.697734", "step": 2842, "epoch": 2 }, { "type": "loss", "content": 0.00977950356900692, "timestamp": "2025-09-10 02:28:53.699883", "step": 2843, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:53.728962", "step": 2843, "epoch": 2 }, { "type": "loss", "content": 0.012906111776828766, "timestamp": "2025-09-10 02:28:53.752680", "step": 2844, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:53.786484", "step": 2844, "epoch": 2 }, { "type": "loss", "content": 0.0015678195049986243, "timestamp": "2025-09-10 02:28:53.790975", "step": 2845, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:53.820606", "step": 2845, "epoch": 2 }, { "type": "loss", "content": 0.032121773809194565, "timestamp": "2025-09-10 02:28:53.823031", "step": 2846, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:53.852699", "step": 2846, "epoch": 2 }, { "type": "loss", "content": 0.035404808819293976, "timestamp": "2025-09-10 02:28:53.855094", "step": 2847, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:53.884113", "step": 2847, "epoch": 2 }, { "type": "loss", "content": 0.004591009113937616, "timestamp": "2025-09-10 02:28:53.907504", "step": 2848, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:53.937008", "step": 2848, "epoch": 2 }, { "type": "loss", "content": 0.0016618984518572688, "timestamp": "2025-09-10 02:28:53.938758", "step": 2849, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:53.967481", "step": 2849, "epoch": 2 }, { "type": "loss", "content": 0.014219125732779503, "timestamp": "2025-09-10 02:28:53.969585", "step": 2850, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:53.998685", "step": 2850, "epoch": 2 }, { "type": "loss", "content": 0.01862996816635132, "timestamp": "2025-09-10 02:28:54.000567", "step": 2851, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:54.029550", "step": 2851, "epoch": 2 }, { "type": "loss", "content": 0.005646827165037394, "timestamp": "2025-09-10 02:28:54.052928", "step": 2852, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:54.081709", "step": 2852, "epoch": 2 }, { "type": "loss", "content": 0.025086741894483566, "timestamp": "2025-09-10 02:28:54.083550", "step": 2853, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:54.112364", "step": 2853, "epoch": 2 }, { "type": "loss", "content": 0.013008520938456059, "timestamp": "2025-09-10 02:28:54.114221", "step": 2854, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:54.142867", "step": 2854, "epoch": 2 }, { "type": "loss", "content": 0.005398153327405453, "timestamp": "2025-09-10 02:28:54.144623", "step": 2855, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:54.174625", "step": 2855, "epoch": 2 }, { "type": "loss", "content": 0.0349435992538929, "timestamp": "2025-09-10 02:28:54.198068", "step": 2856, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:54.227002", "step": 2856, "epoch": 2 }, { "type": "loss", "content": 0.002373448805883527, "timestamp": "2025-09-10 02:28:54.228909", "step": 2857, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:54.257850", "step": 2857, "epoch": 2 }, { "type": "loss", "content": 0.026640325784683228, "timestamp": "2025-09-10 02:28:54.259760", "step": 2858, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:54.288703", "step": 2858, "epoch": 2 }, { "type": "loss", "content": 0.027330685406923294, "timestamp": "2025-09-10 02:28:54.290778", "step": 2859, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:54.319697", "step": 2859, "epoch": 2 }, { "type": "loss", "content": 0.04181050881743431, "timestamp": "2025-09-10 02:28:54.343173", "step": 2860, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:54.372862", "step": 2860, "epoch": 2 }, { "type": "loss", "content": 0.0061957393772900105, "timestamp": "2025-09-10 02:28:54.374605", "step": 2861, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:54.403631", "step": 2861, "epoch": 2 }, { "type": "loss", "content": 0.01754625327885151, "timestamp": "2025-09-10 02:28:54.405586", "step": 2862, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:54.434636", "step": 2862, "epoch": 2 }, { "type": "loss", "content": 0.01089306827634573, "timestamp": "2025-09-10 02:28:54.436648", "step": 2863, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:54.465879", "step": 2863, "epoch": 2 }, { "type": "loss", "content": 0.01644907519221306, "timestamp": "2025-09-10 02:28:54.489900", "step": 2864, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:54.518950", "step": 2864, "epoch": 2 }, { "type": "loss", "content": 0.0014806865947321057, "timestamp": "2025-09-10 02:28:54.521039", "step": 2865, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:54.549860", "step": 2865, "epoch": 2 }, { "type": "loss", "content": 0.027932481840252876, "timestamp": "2025-09-10 02:28:54.552522", "step": 2866, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:54.581594", "step": 2866, "epoch": 2 }, { "type": "loss", "content": 0.001444231136702001, "timestamp": "2025-09-10 02:28:54.583466", "step": 2867, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:54.612135", "step": 2867, "epoch": 2 }, { "type": "loss", "content": 0.001635089167393744, "timestamp": "2025-09-10 02:28:54.635857", "step": 2868, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:54.664448", "step": 2868, "epoch": 2 }, { "type": "loss", "content": 0.06270123273134232, "timestamp": "2025-09-10 02:28:54.666257", "step": 2869, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:54.696875", "step": 2869, "epoch": 2 }, { "type": "loss", "content": 0.04237469285726547, "timestamp": "2025-09-10 02:28:54.698910", "step": 2870, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:54.727912", "step": 2870, "epoch": 2 }, { "type": "loss", "content": 0.0055243996903300285, "timestamp": "2025-09-10 02:28:54.729745", "step": 2871, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:54.758842", "step": 2871, "epoch": 2 }, { "type": "loss", "content": 0.0022270630579441786, "timestamp": "2025-09-10 02:28:54.782047", "step": 2872, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:54.811084", "step": 2872, "epoch": 2 }, { "type": "loss", "content": 0.015764104202389717, "timestamp": "2025-09-10 02:28:54.813173", "step": 2873, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:54.841961", "step": 2873, "epoch": 2 }, { "type": "loss", "content": 0.025673216208815575, "timestamp": "2025-09-10 02:28:54.844034", "step": 2874, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:54.873356", "step": 2874, "epoch": 2 }, { "type": "loss", "content": 0.013369431719183922, "timestamp": "2025-09-10 02:28:54.875270", "step": 2875, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:54.904632", "step": 2875, "epoch": 2 }, { "type": "loss", "content": 0.0023222228046506643, "timestamp": "2025-09-10 02:28:54.928025", "step": 2876, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:54.957186", "step": 2876, "epoch": 2 }, { "type": "loss", "content": 0.03977759927511215, "timestamp": "2025-09-10 02:28:54.959106", "step": 2877, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:54.988156", "step": 2877, "epoch": 2 }, { "type": "loss", "content": 0.007196251768618822, "timestamp": "2025-09-10 02:28:54.990040", "step": 2878, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:55.018923", "step": 2878, "epoch": 2 }, { "type": "loss", "content": 0.004890757147222757, "timestamp": "2025-09-10 02:28:55.021025", "step": 2879, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:55.050166", "step": 2879, "epoch": 2 }, { "type": "loss", "content": 0.02686142362654209, "timestamp": "2025-09-10 02:28:55.073427", "step": 2880, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:55.102314", "step": 2880, "epoch": 2 }, { "type": "loss", "content": 0.015549045987427235, "timestamp": "2025-09-10 02:28:55.104168", "step": 2881, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:55.133114", "step": 2881, "epoch": 2 }, { "type": "loss", "content": 0.04396245256066322, "timestamp": "2025-09-10 02:28:55.135004", "step": 2882, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:55.163934", "step": 2882, "epoch": 2 }, { "type": "loss", "content": 0.0018287552520632744, "timestamp": "2025-09-10 02:28:55.165901", "step": 2883, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:55.194907", "step": 2883, "epoch": 2 }, { "type": "loss", "content": 0.04077062010765076, "timestamp": "2025-09-10 02:28:55.218384", "step": 2884, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:55.247341", "step": 2884, "epoch": 2 }, { "type": "loss", "content": 0.006520784460008144, "timestamp": "2025-09-10 02:28:55.249020", "step": 2885, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:55.278048", "step": 2885, "epoch": 2 }, { "type": "loss", "content": 0.004114919807761908, "timestamp": "2025-09-10 02:28:55.280068", "step": 2886, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:55.309247", "step": 2886, "epoch": 2 }, { "type": "loss", "content": 0.004029436502605677, "timestamp": "2025-09-10 02:28:55.311090", "step": 2887, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:55.339984", "step": 2887, "epoch": 2 }, { "type": "loss", "content": 0.030637195333838463, "timestamp": "2025-09-10 02:28:55.363626", "step": 2888, "epoch": 2 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 96 ], "batch_size": 8, "flops": 1898404492032 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 5, 80 ], "batch_size": 8, "flops": 1582003754624 } ], "timestamp": "2025-09-10 02:28:57.281618", "step": 2888, "epoch": 2 }, { "type": "pplx", "content": 2362714.2441541348, "timestamp": "2025-09-10 02:28:57.283737", "step": 2888, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:57.311992", "step": 2888, "epoch": 2 }, { "type": "loss", "content": 0.03481084480881691, "timestamp": "2025-09-10 02:28:57.314135", "step": 2889, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:57.343638", "step": 2889, "epoch": 2 }, { "type": "loss", "content": 0.009121944196522236, "timestamp": "2025-09-10 02:28:57.345523", "step": 2890, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:57.374675", "step": 2890, "epoch": 2 }, { "type": "loss", "content": 0.008922411128878593, "timestamp": "2025-09-10 02:28:57.376281", "step": 2891, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:57.405502", "step": 2891, "epoch": 2 }, { "type": "loss", "content": 0.09262406080961227, "timestamp": "2025-09-10 02:28:57.429138", "step": 2892, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:57.458070", "step": 2892, "epoch": 2 }, { "type": "loss", "content": 0.004805420991033316, "timestamp": "2025-09-10 02:28:57.460015", "step": 2893, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:57.488646", "step": 2893, "epoch": 2 }, { "type": "loss", "content": 0.00714575732126832, "timestamp": "2025-09-10 02:28:57.490465", "step": 2894, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:57.519866", "step": 2894, "epoch": 2 }, { "type": "loss", "content": 0.014643056318163872, "timestamp": "2025-09-10 02:28:57.521833", "step": 2895, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:57.551982", "step": 2895, "epoch": 2 }, { "type": "loss", "content": 0.022045502439141273, "timestamp": "2025-09-10 02:28:57.575615", "step": 2896, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:57.604356", "step": 2896, "epoch": 2 }, { "type": "loss", "content": 0.016090938821434975, "timestamp": "2025-09-10 02:28:57.606360", "step": 2897, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:57.635811", "step": 2897, "epoch": 2 }, { "type": "loss", "content": 0.014781218953430653, "timestamp": "2025-09-10 02:28:57.637628", "step": 2898, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:57.666486", "step": 2898, "epoch": 2 }, { "type": "loss", "content": 0.016501110047101974, "timestamp": "2025-09-10 02:28:57.668419", "step": 2899, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:57.697893", "step": 2899, "epoch": 2 }, { "type": "loss", "content": 0.0067305476404726505, "timestamp": "2025-09-10 02:28:57.721428", "step": 2900, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:57.751126", "step": 2900, "epoch": 2 }, { "type": "loss", "content": 0.02997549995779991, "timestamp": "2025-09-10 02:28:57.752913", "step": 2901, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:57.781871", "step": 2901, "epoch": 2 }, { "type": "loss", "content": 0.03778936341404915, "timestamp": "2025-09-10 02:28:57.784038", "step": 2902, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:57.816766", "step": 2902, "epoch": 2 }, { "type": "loss", "content": 0.02480734884738922, "timestamp": "2025-09-10 02:28:57.818623", "step": 2903, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:57.849799", "step": 2903, "epoch": 2 }, { "type": "loss", "content": 0.017994388937950134, "timestamp": "2025-09-10 02:28:57.873118", "step": 2904, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:57.902255", "step": 2904, "epoch": 2 }, { "type": "loss", "content": 0.004165151156485081, "timestamp": "2025-09-10 02:28:57.904126", "step": 2905, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:57.933095", "step": 2905, "epoch": 2 }, { "type": "loss", "content": 0.0027392476331442595, "timestamp": "2025-09-10 02:28:57.934997", "step": 2906, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:57.963955", "step": 2906, "epoch": 2 }, { "type": "loss", "content": 0.023687604814767838, "timestamp": "2025-09-10 02:28:57.965830", "step": 2907, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:57.995014", "step": 2907, "epoch": 2 }, { "type": "loss", "content": 0.027018358930945396, "timestamp": "2025-09-10 02:28:58.018533", "step": 2908, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:58.048330", "step": 2908, "epoch": 2 }, { "type": "loss", "content": 0.007352299056947231, "timestamp": "2025-09-10 02:28:58.050530", "step": 2909, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:58.079268", "step": 2909, "epoch": 2 }, { "type": "loss", "content": 0.03476090729236603, "timestamp": "2025-09-10 02:28:58.081186", "step": 2910, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:58.110181", "step": 2910, "epoch": 2 }, { "type": "loss", "content": 0.007333045359700918, "timestamp": "2025-09-10 02:28:58.112012", "step": 2911, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:58.140999", "step": 2911, "epoch": 2 }, { "type": "loss", "content": 0.017422862350940704, "timestamp": "2025-09-10 02:28:58.164734", "step": 2912, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:58.194028", "step": 2912, "epoch": 2 }, { "type": "loss", "content": 0.03590952232480049, "timestamp": "2025-09-10 02:28:58.196305", "step": 2913, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:58.225173", "step": 2913, "epoch": 2 }, { "type": "loss", "content": 0.023626228794455528, "timestamp": "2025-09-10 02:28:58.226937", "step": 2914, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:58.256424", "step": 2914, "epoch": 2 }, { "type": "loss", "content": 0.010642859153449535, "timestamp": "2025-09-10 02:28:58.259607", "step": 2915, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:58.288670", "step": 2915, "epoch": 2 }, { "type": "loss", "content": 0.05696403607726097, "timestamp": "2025-09-10 02:28:58.312366", "step": 2916, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:58.342332", "step": 2916, "epoch": 2 }, { "type": "loss", "content": 0.01412131730467081, "timestamp": "2025-09-10 02:28:58.344070", "step": 2917, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:58.372904", "step": 2917, "epoch": 2 }, { "type": "loss", "content": 0.05304744094610214, "timestamp": "2025-09-10 02:28:58.374757", "step": 2918, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:58.404993", "step": 2918, "epoch": 2 }, { "type": "loss", "content": 0.01562072616070509, "timestamp": "2025-09-10 02:28:58.407532", "step": 2919, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:58.436838", "step": 2919, "epoch": 2 }, { "type": "loss", "content": 0.009218626655638218, "timestamp": "2025-09-10 02:28:58.460085", "step": 2920, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:58.489459", "step": 2920, "epoch": 2 }, { "type": "loss", "content": 0.01481709536164999, "timestamp": "2025-09-10 02:28:58.491772", "step": 2921, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:58.521016", "step": 2921, "epoch": 2 }, { "type": "loss", "content": 0.0690324604511261, "timestamp": "2025-09-10 02:28:58.522801", "step": 2922, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:58.552438", "step": 2922, "epoch": 2 }, { "type": "loss", "content": 0.01687428168952465, "timestamp": "2025-09-10 02:28:58.554350", "step": 2923, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:58.583720", "step": 2923, "epoch": 2 }, { "type": "loss", "content": 0.01327650249004364, "timestamp": "2025-09-10 02:28:58.607284", "step": 2924, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:58.637218", "step": 2924, "epoch": 2 }, { "type": "loss", "content": 0.06129451468586922, "timestamp": "2025-09-10 02:28:58.639226", "step": 2925, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:58.668446", "step": 2925, "epoch": 2 }, { "type": "loss", "content": 0.0045118811540305614, "timestamp": "2025-09-10 02:28:58.670476", "step": 2926, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:58.700156", "step": 2926, "epoch": 2 }, { "type": "loss", "content": 0.021669356152415276, "timestamp": "2025-09-10 02:28:58.702253", "step": 2927, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:58.731696", "step": 2927, "epoch": 2 }, { "type": "loss", "content": 0.014112145639955997, "timestamp": "2025-09-10 02:28:58.755099", "step": 2928, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:58.784355", "step": 2928, "epoch": 2 }, { "type": "loss", "content": 0.05611876770853996, "timestamp": "2025-09-10 02:28:58.786122", "step": 2929, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:58.816116", "step": 2929, "epoch": 2 }, { "type": "loss", "content": 0.03413539007306099, "timestamp": "2025-09-10 02:28:58.817936", "step": 2930, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:58.847248", "step": 2930, "epoch": 2 }, { "type": "loss", "content": 0.01542122382670641, "timestamp": "2025-09-10 02:28:58.849742", "step": 2931, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:58.878696", "step": 2931, "epoch": 2 }, { "type": "loss", "content": 0.005345365963876247, "timestamp": "2025-09-10 02:28:58.902846", "step": 2932, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:58.932166", "step": 2932, "epoch": 2 }, { "type": "loss", "content": 0.011457053013145924, "timestamp": "2025-09-10 02:28:58.934240", "step": 2933, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:58.963628", "step": 2933, "epoch": 2 }, { "type": "loss", "content": 0.014577369205653667, "timestamp": "2025-09-10 02:28:58.965494", "step": 2934, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:58.994637", "step": 2934, "epoch": 2 }, { "type": "loss", "content": 0.007745796348899603, "timestamp": "2025-09-10 02:28:58.996491", "step": 2935, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:59.025822", "step": 2935, "epoch": 2 }, { "type": "loss", "content": 0.0023972075432538986, "timestamp": "2025-09-10 02:28:59.049329", "step": 2936, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:59.078851", "step": 2936, "epoch": 2 }, { "type": "loss", "content": 0.001987746451050043, "timestamp": "2025-09-10 02:28:59.082169", "step": 2937, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:59.114206", "step": 2937, "epoch": 2 }, { "type": "loss", "content": 0.0034970049746334553, "timestamp": "2025-09-10 02:28:59.116306", "step": 2938, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:59.145744", "step": 2938, "epoch": 2 }, { "type": "loss", "content": 0.03557474911212921, "timestamp": "2025-09-10 02:28:59.148142", "step": 2939, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:59.177218", "step": 2939, "epoch": 2 }, { "type": "loss", "content": 0.021109571680426598, "timestamp": "2025-09-10 02:28:59.200736", "step": 2940, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:59.230277", "step": 2940, "epoch": 2 }, { "type": "loss", "content": 0.013695952482521534, "timestamp": "2025-09-10 02:28:59.232315", "step": 2941, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:59.262070", "step": 2941, "epoch": 2 }, { "type": "loss", "content": 0.009242476895451546, "timestamp": "2025-09-10 02:28:59.264053", "step": 2942, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:59.293073", "step": 2942, "epoch": 2 }, { "type": "loss", "content": 0.01628132350742817, "timestamp": "2025-09-10 02:28:59.295009", "step": 2943, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:59.323915", "step": 2943, "epoch": 2 }, { "type": "loss", "content": 0.009609685279428959, "timestamp": "2025-09-10 02:28:59.347380", "step": 2944, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:59.376503", "step": 2944, "epoch": 2 }, { "type": "loss", "content": 0.0017871072050184011, "timestamp": "2025-09-10 02:28:59.378538", "step": 2945, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:59.408653", "step": 2945, "epoch": 2 }, { "type": "loss", "content": 0.00531261321157217, "timestamp": "2025-09-10 02:28:59.410629", "step": 2946, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:59.440273", "step": 2946, "epoch": 2 }, { "type": "loss", "content": 0.015613110736012459, "timestamp": "2025-09-10 02:28:59.442267", "step": 2947, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:28:59.471194", "step": 2947, "epoch": 2 }, { "type": "loss", "content": 0.016179624944925308, "timestamp": "2025-09-10 02:28:59.494597", "step": 2948, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:59.523863", "step": 2948, "epoch": 2 }, { "type": "loss", "content": 0.008063350804150105, "timestamp": "2025-09-10 02:28:59.525741", "step": 2949, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:59.554807", "step": 2949, "epoch": 2 }, { "type": "loss", "content": 0.040376536548137665, "timestamp": "2025-09-10 02:28:59.556551", "step": 2950, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:59.585999", "step": 2950, "epoch": 2 }, { "type": "loss", "content": 0.016894422471523285, "timestamp": "2025-09-10 02:28:59.587777", "step": 2951, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:59.616877", "step": 2951, "epoch": 2 }, { "type": "loss", "content": 0.02639775164425373, "timestamp": "2025-09-10 02:28:59.640323", "step": 2952, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:59.670306", "step": 2952, "epoch": 2 }, { "type": "loss", "content": 0.0056940168142318726, "timestamp": "2025-09-10 02:28:59.672321", "step": 2953, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:59.701760", "step": 2953, "epoch": 2 }, { "type": "loss", "content": 0.02135222777724266, "timestamp": "2025-09-10 02:28:59.703797", "step": 2954, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:59.733696", "step": 2954, "epoch": 2 }, { "type": "loss", "content": 0.005273348186165094, "timestamp": "2025-09-10 02:28:59.735431", "step": 2955, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:59.764454", "step": 2955, "epoch": 2 }, { "type": "loss", "content": 0.011165021918714046, "timestamp": "2025-09-10 02:28:59.787989", "step": 2956, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:28:59.816765", "step": 2956, "epoch": 2 }, { "type": "loss", "content": 0.00407416420057416, "timestamp": "2025-09-10 02:28:59.818763", "step": 2957, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:59.848055", "step": 2957, "epoch": 2 }, { "type": "loss", "content": 0.004318686667829752, "timestamp": "2025-09-10 02:28:59.850048", "step": 2958, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:59.879019", "step": 2958, "epoch": 2 }, { "type": "loss", "content": 0.020520392805337906, "timestamp": "2025-09-10 02:28:59.881020", "step": 2959, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:59.910116", "step": 2959, "epoch": 2 }, { "type": "loss", "content": 0.0023496714420616627, "timestamp": "2025-09-10 02:28:59.933376", "step": 2960, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:59.962865", "step": 2960, "epoch": 2 }, { "type": "loss", "content": 0.015099072828888893, "timestamp": "2025-09-10 02:28:59.964777", "step": 2961, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:28:59.993299", "step": 2961, "epoch": 2 }, { "type": "loss", "content": 0.014655820094048977, "timestamp": "2025-09-10 02:28:59.995135", "step": 2962, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:00.024181", "step": 2962, "epoch": 2 }, { "type": "loss", "content": 0.028190674260258675, "timestamp": "2025-09-10 02:29:00.026418", "step": 2963, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:00.055704", "step": 2963, "epoch": 2 }, { "type": "loss", "content": 0.025873301550745964, "timestamp": "2025-09-10 02:29:00.079092", "step": 2964, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:00.108617", "step": 2964, "epoch": 2 }, { "type": "loss", "content": 0.04126526787877083, "timestamp": "2025-09-10 02:29:00.110670", "step": 2965, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:00.140624", "step": 2965, "epoch": 2 }, { "type": "loss", "content": 0.027858402580022812, "timestamp": "2025-09-10 02:29:00.142623", "step": 2966, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:00.172072", "step": 2966, "epoch": 2 }, { "type": "loss", "content": 0.012937195599079132, "timestamp": "2025-09-10 02:29:00.174234", "step": 2967, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:00.203956", "step": 2967, "epoch": 2 }, { "type": "loss", "content": 0.03162816911935806, "timestamp": "2025-09-10 02:29:00.227503", "step": 2968, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:00.256861", "step": 2968, "epoch": 2 }, { "type": "loss", "content": 0.0017554127844050527, "timestamp": "2025-09-10 02:29:00.259241", "step": 2969, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:00.288293", "step": 2969, "epoch": 2 }, { "type": "loss", "content": 0.002710097935050726, "timestamp": "2025-09-10 02:29:00.290403", "step": 2970, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:00.319301", "step": 2970, "epoch": 2 }, { "type": "loss", "content": 0.0070211924612522125, "timestamp": "2025-09-10 02:29:00.321030", "step": 2971, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:00.350530", "step": 2971, "epoch": 2 }, { "type": "loss", "content": 0.0010155083145946264, "timestamp": "2025-09-10 02:29:00.373793", "step": 2972, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:00.404322", "step": 2972, "epoch": 2 }, { "type": "loss", "content": 0.000555753125809133, "timestamp": "2025-09-10 02:29:00.406028", "step": 2973, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:00.435097", "step": 2973, "epoch": 2 }, { "type": "loss", "content": 0.013008542358875275, "timestamp": "2025-09-10 02:29:00.437081", "step": 2974, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:00.466274", "step": 2974, "epoch": 2 }, { "type": "loss", "content": 0.0009575859876349568, "timestamp": "2025-09-10 02:29:00.468451", "step": 2975, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:00.498197", "step": 2975, "epoch": 2 }, { "type": "loss", "content": 0.0020100839901715517, "timestamp": "2025-09-10 02:29:00.521708", "step": 2976, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:00.550664", "step": 2976, "epoch": 2 }, { "type": "loss", "content": 0.004602736793458462, "timestamp": "2025-09-10 02:29:00.552447", "step": 2977, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:00.581350", "step": 2977, "epoch": 2 }, { "type": "loss", "content": 0.04919513687491417, "timestamp": "2025-09-10 02:29:00.583279", "step": 2978, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:00.612169", "step": 2978, "epoch": 2 }, { "type": "loss", "content": 0.0027786530554294586, "timestamp": "2025-09-10 02:29:00.614178", "step": 2979, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:00.643009", "step": 2979, "epoch": 2 }, { "type": "loss", "content": 0.0006430782377719879, "timestamp": "2025-09-10 02:29:00.666519", "step": 2980, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:00.696638", "step": 2980, "epoch": 2 }, { "type": "loss", "content": 0.0048445756547153, "timestamp": "2025-09-10 02:29:00.699090", "step": 2981, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:00.728313", "step": 2981, "epoch": 2 }, { "type": "loss", "content": 0.00392432464286685, "timestamp": "2025-09-10 02:29:00.730293", "step": 2982, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:00.760052", "step": 2982, "epoch": 2 }, { "type": "loss", "content": 0.006992859300225973, "timestamp": "2025-09-10 02:29:00.762002", "step": 2983, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:00.791651", "step": 2983, "epoch": 2 }, { "type": "loss", "content": 0.0005739748594351113, "timestamp": "2025-09-10 02:29:00.815112", "step": 2984, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:00.845039", "step": 2984, "epoch": 2 }, { "type": "loss", "content": 0.0016939117340371013, "timestamp": "2025-09-10 02:29:00.847349", "step": 2985, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:00.876699", "step": 2985, "epoch": 2 }, { "type": "loss", "content": 0.0168430358171463, "timestamp": "2025-09-10 02:29:00.878675", "step": 2986, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:00.908493", "step": 2986, "epoch": 2 }, { "type": "loss", "content": 0.0028911347035318613, "timestamp": "2025-09-10 02:29:00.910706", "step": 2987, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:00.939730", "step": 2987, "epoch": 2 }, { "type": "loss", "content": 0.01523431483656168, "timestamp": "2025-09-10 02:29:00.963297", "step": 2988, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:00.992176", "step": 2988, "epoch": 2 }, { "type": "loss", "content": 0.02090136520564556, "timestamp": "2025-09-10 02:29:00.994124", "step": 2989, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:01.023036", "step": 2989, "epoch": 2 }, { "type": "loss", "content": 0.008178298361599445, "timestamp": "2025-09-10 02:29:01.024788", "step": 2990, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:01.053576", "step": 2990, "epoch": 2 }, { "type": "loss", "content": 0.016658276319503784, "timestamp": "2025-09-10 02:29:01.055565", "step": 2991, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:01.084491", "step": 2991, "epoch": 2 }, { "type": "loss", "content": 0.019692067056894302, "timestamp": "2025-09-10 02:29:01.107689", "step": 2992, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:01.137271", "step": 2992, "epoch": 2 }, { "type": "loss", "content": 0.0005797953344881535, "timestamp": "2025-09-10 02:29:01.139096", "step": 2993, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:01.168862", "step": 2993, "epoch": 2 }, { "type": "loss", "content": 0.02017974480986595, "timestamp": "2025-09-10 02:29:01.170885", "step": 2994, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:01.201073", "step": 2994, "epoch": 2 }, { "type": "loss", "content": 0.0022124892566353083, "timestamp": "2025-09-10 02:29:01.202844", "step": 2995, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:01.232607", "step": 2995, "epoch": 2 }, { "type": "loss", "content": 0.001253906637430191, "timestamp": "2025-09-10 02:29:01.256218", "step": 2996, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:01.285086", "step": 2996, "epoch": 2 }, { "type": "loss", "content": 0.010353362187743187, "timestamp": "2025-09-10 02:29:01.287191", "step": 2997, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:01.316141", "step": 2997, "epoch": 2 }, { "type": "loss", "content": 0.043722331523895264, "timestamp": "2025-09-10 02:29:01.318267", "step": 2998, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:01.347581", "step": 2998, "epoch": 2 }, { "type": "loss", "content": 0.040330640971660614, "timestamp": "2025-09-10 02:29:01.349813", "step": 2999, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:01.378905", "step": 2999, "epoch": 2 }, { "type": "loss", "content": 0.02096661739051342, "timestamp": "2025-09-10 02:29:01.402402", "step": 3000, "epoch": 2 }, { "type": "info", "content": "Checkpoint saved at step 3000", "timestamp": "2025-09-10 02:29:05.933676", "step": 3000, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:05.968961", "step": 3000, "epoch": 2 }, { "type": "loss", "content": 0.004011655226349831, "timestamp": "2025-09-10 02:29:05.970934", "step": 3001, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:06.000028", "step": 3001, "epoch": 2 }, { "type": "loss", "content": 0.023080889135599136, "timestamp": "2025-09-10 02:29:06.002096", "step": 3002, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:06.031260", "step": 3002, "epoch": 2 }, { "type": "loss", "content": 0.02378918044269085, "timestamp": "2025-09-10 02:29:06.033269", "step": 3003, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:06.062713", "step": 3003, "epoch": 2 }, { "type": "loss", "content": 0.0014864916447550058, "timestamp": "2025-09-10 02:29:06.086162", "step": 3004, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:06.116439", "step": 3004, "epoch": 2 }, { "type": "loss", "content": 0.0007168581942096353, "timestamp": "2025-09-10 02:29:06.118404", "step": 3005, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:06.147550", "step": 3005, "epoch": 2 }, { "type": "loss", "content": 0.05206666514277458, "timestamp": "2025-09-10 02:29:06.149596", "step": 3006, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:06.178886", "step": 3006, "epoch": 2 }, { "type": "loss", "content": 0.019438933581113815, "timestamp": "2025-09-10 02:29:06.180674", "step": 3007, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:06.209472", "step": 3007, "epoch": 2 }, { "type": "loss", "content": 0.002043353859335184, "timestamp": "2025-09-10 02:29:06.233082", "step": 3008, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:06.261783", "step": 3008, "epoch": 2 }, { "type": "loss", "content": 0.044223468750715256, "timestamp": "2025-09-10 02:29:06.263836", "step": 3009, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:06.292836", "step": 3009, "epoch": 2 }, { "type": "loss", "content": 0.0005792967858724296, "timestamp": "2025-09-10 02:29:06.294508", "step": 3010, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:06.323380", "step": 3010, "epoch": 2 }, { "type": "loss", "content": 0.0050115748308598995, "timestamp": "2025-09-10 02:29:06.325368", "step": 3011, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:06.354997", "step": 3011, "epoch": 2 }, { "type": "loss", "content": 0.00391251128166914, "timestamp": "2025-09-10 02:29:06.378556", "step": 3012, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:06.408100", "step": 3012, "epoch": 2 }, { "type": "loss", "content": 0.01394498161971569, "timestamp": "2025-09-10 02:29:06.409939", "step": 3013, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:06.438616", "step": 3013, "epoch": 2 }, { "type": "loss", "content": 0.003175001125782728, "timestamp": "2025-09-10 02:29:06.440781", "step": 3014, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:06.469759", "step": 3014, "epoch": 2 }, { "type": "loss", "content": 0.0010710560018196702, "timestamp": "2025-09-10 02:29:06.471591", "step": 3015, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:06.500640", "step": 3015, "epoch": 2 }, { "type": "loss", "content": 0.013937624171376228, "timestamp": "2025-09-10 02:29:06.524144", "step": 3016, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:06.553247", "step": 3016, "epoch": 2 }, { "type": "loss", "content": 0.000414675276260823, "timestamp": "2025-09-10 02:29:06.556309", "step": 3017, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:06.585745", "step": 3017, "epoch": 2 }, { "type": "loss", "content": 0.028705088421702385, "timestamp": "2025-09-10 02:29:06.587848", "step": 3018, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:06.617161", "step": 3018, "epoch": 2 }, { "type": "loss", "content": 0.0010774965630844235, "timestamp": "2025-09-10 02:29:06.619285", "step": 3019, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:06.648167", "step": 3019, "epoch": 2 }, { "type": "loss", "content": 0.004061760846525431, "timestamp": "2025-09-10 02:29:06.671618", "step": 3020, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:06.700559", "step": 3020, "epoch": 2 }, { "type": "loss", "content": 0.0024131848476827145, "timestamp": "2025-09-10 02:29:06.702470", "step": 3021, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:06.731890", "step": 3021, "epoch": 2 }, { "type": "loss", "content": 0.0018435295205563307, "timestamp": "2025-09-10 02:29:06.733913", "step": 3022, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:06.762739", "step": 3022, "epoch": 2 }, { "type": "loss", "content": 0.020958632230758667, "timestamp": "2025-09-10 02:29:06.764810", "step": 3023, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:06.793547", "step": 3023, "epoch": 2 }, { "type": "loss", "content": 0.014496517367661, "timestamp": "2025-09-10 02:29:06.816949", "step": 3024, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:29:06.846274", "step": 3024, "epoch": 2 }, { "type": "loss", "content": 0.005536026321351528, "timestamp": "2025-09-10 02:29:06.848073", "step": 3025, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:06.876949", "step": 3025, "epoch": 2 }, { "type": "loss", "content": 0.0032261740416288376, "timestamp": "2025-09-10 02:29:06.878988", "step": 3026, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:06.908360", "step": 3026, "epoch": 2 }, { "type": "loss", "content": 0.0034612170420587063, "timestamp": "2025-09-10 02:29:06.910355", "step": 3027, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:06.940011", "step": 3027, "epoch": 2 }, { "type": "loss", "content": 0.02481548674404621, "timestamp": "2025-09-10 02:29:06.963543", "step": 3028, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:06.992988", "step": 3028, "epoch": 2 }, { "type": "loss", "content": 0.03705863282084465, "timestamp": "2025-09-10 02:29:06.994845", "step": 3029, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:07.023372", "step": 3029, "epoch": 2 }, { "type": "loss", "content": 0.00185360386967659, "timestamp": "2025-09-10 02:29:07.025141", "step": 3030, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:07.054158", "step": 3030, "epoch": 2 }, { "type": "loss", "content": 0.008396097458899021, "timestamp": "2025-09-10 02:29:07.056175", "step": 3031, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:07.085877", "step": 3031, "epoch": 2 }, { "type": "loss", "content": 0.0005369320861063898, "timestamp": "2025-09-10 02:29:07.109145", "step": 3032, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:07.137610", "step": 3032, "epoch": 2 }, { "type": "loss", "content": 0.023257803171873093, "timestamp": "2025-09-10 02:29:07.139434", "step": 3033, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:07.168219", "step": 3033, "epoch": 2 }, { "type": "loss", "content": 0.0015755105996504426, "timestamp": "2025-09-10 02:29:07.170023", "step": 3034, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:07.198916", "step": 3034, "epoch": 2 }, { "type": "loss", "content": 0.00027058369596488774, "timestamp": "2025-09-10 02:29:07.200704", "step": 3035, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:07.229254", "step": 3035, "epoch": 2 }, { "type": "loss", "content": 0.05012970790266991, "timestamp": "2025-09-10 02:29:07.252556", "step": 3036, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:07.281989", "step": 3036, "epoch": 2 }, { "type": "loss", "content": 0.05236015096306801, "timestamp": "2025-09-10 02:29:07.283906", "step": 3037, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:07.312318", "step": 3037, "epoch": 2 }, { "type": "loss", "content": 0.0019132104935124516, "timestamp": "2025-09-10 02:29:07.314272", "step": 3038, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:07.343729", "step": 3038, "epoch": 2 }, { "type": "loss", "content": 0.00043177817133255303, "timestamp": "2025-09-10 02:29:07.345838", "step": 3039, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:07.374576", "step": 3039, "epoch": 2 }, { "type": "loss", "content": 0.010013881139457226, "timestamp": "2025-09-10 02:29:07.397892", "step": 3040, "epoch": 2 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 96 ], "batch_size": 8, "flops": 1898404492032 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 5, 80 ], "batch_size": 8, "flops": 1582003754624 } ], "timestamp": "2025-09-10 02:29:09.334763", "step": 3040, "epoch": 2 }, { "type": "pplx", "content": 2529260.9904143927, "timestamp": "2025-09-10 02:29:09.336650", "step": 3040, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:09.364755", "step": 3040, "epoch": 2 }, { "type": "loss", "content": 0.017850453034043312, "timestamp": "2025-09-10 02:29:09.366767", "step": 3041, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:09.396693", "step": 3041, "epoch": 2 }, { "type": "loss", "content": 0.0032058064825832844, "timestamp": "2025-09-10 02:29:09.398667", "step": 3042, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:09.428534", "step": 3042, "epoch": 2 }, { "type": "loss", "content": 0.0006834662635810673, "timestamp": "2025-09-10 02:29:09.430565", "step": 3043, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:09.460812", "step": 3043, "epoch": 2 }, { "type": "loss", "content": 0.0007365393685176969, "timestamp": "2025-09-10 02:29:09.484438", "step": 3044, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:09.513925", "step": 3044, "epoch": 2 }, { "type": "loss", "content": 0.06051953509449959, "timestamp": "2025-09-10 02:29:09.515943", "step": 3045, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:09.544765", "step": 3045, "epoch": 2 }, { "type": "loss", "content": 0.009152843616902828, "timestamp": "2025-09-10 02:29:09.546873", "step": 3046, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:09.575880", "step": 3046, "epoch": 2 }, { "type": "loss", "content": 0.00657705357298255, "timestamp": "2025-09-10 02:29:09.577795", "step": 3047, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:09.607046", "step": 3047, "epoch": 2 }, { "type": "loss", "content": 0.0009380311821587384, "timestamp": "2025-09-10 02:29:09.630488", "step": 3048, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:09.660097", "step": 3048, "epoch": 2 }, { "type": "loss", "content": 0.01918032392859459, "timestamp": "2025-09-10 02:29:09.662208", "step": 3049, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:09.690970", "step": 3049, "epoch": 2 }, { "type": "loss", "content": 0.007777246180921793, "timestamp": "2025-09-10 02:29:09.693080", "step": 3050, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:09.722155", "step": 3050, "epoch": 2 }, { "type": "loss", "content": 0.005746803712099791, "timestamp": "2025-09-10 02:29:09.723827", "step": 3051, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:09.752403", "step": 3051, "epoch": 2 }, { "type": "loss", "content": 0.0005835113115608692, "timestamp": "2025-09-10 02:29:09.775976", "step": 3052, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:09.805095", "step": 3052, "epoch": 2 }, { "type": "loss", "content": 0.047276873141527176, "timestamp": "2025-09-10 02:29:09.806943", "step": 3053, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:09.836865", "step": 3053, "epoch": 2 }, { "type": "loss", "content": 0.001643635332584381, "timestamp": "2025-09-10 02:29:09.838670", "step": 3054, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:09.867705", "step": 3054, "epoch": 2 }, { "type": "loss", "content": 0.005624609533697367, "timestamp": "2025-09-10 02:29:09.869830", "step": 3055, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:09.898985", "step": 3055, "epoch": 2 }, { "type": "loss", "content": 0.00035895127803087234, "timestamp": "2025-09-10 02:29:09.922221", "step": 3056, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:09.955148", "step": 3056, "epoch": 2 }, { "type": "loss", "content": 0.001712158671580255, "timestamp": "2025-09-10 02:29:09.957155", "step": 3057, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:09.986529", "step": 3057, "epoch": 2 }, { "type": "loss", "content": 0.010987815447151661, "timestamp": "2025-09-10 02:29:09.988460", "step": 3058, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:10.017985", "step": 3058, "epoch": 2 }, { "type": "loss", "content": 0.006093787960708141, "timestamp": "2025-09-10 02:29:10.019830", "step": 3059, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:10.048499", "step": 3059, "epoch": 2 }, { "type": "loss", "content": 0.04451434686779976, "timestamp": "2025-09-10 02:29:10.071966", "step": 3060, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:10.101624", "step": 3060, "epoch": 2 }, { "type": "loss", "content": 0.009569605812430382, "timestamp": "2025-09-10 02:29:10.103733", "step": 3061, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:10.132628", "step": 3061, "epoch": 2 }, { "type": "loss", "content": 0.01330816000699997, "timestamp": "2025-09-10 02:29:10.134551", "step": 3062, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:10.163560", "step": 3062, "epoch": 2 }, { "type": "loss", "content": 0.004482160788029432, "timestamp": "2025-09-10 02:29:10.165285", "step": 3063, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:10.194604", "step": 3063, "epoch": 2 }, { "type": "loss", "content": 0.0006850509089417756, "timestamp": "2025-09-10 02:29:10.218143", "step": 3064, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:10.247023", "step": 3064, "epoch": 2 }, { "type": "loss", "content": 0.001920659327879548, "timestamp": "2025-09-10 02:29:10.249046", "step": 3065, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:10.277988", "step": 3065, "epoch": 2 }, { "type": "loss", "content": 0.009923846460878849, "timestamp": "2025-09-10 02:29:10.279836", "step": 3066, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:10.308693", "step": 3066, "epoch": 2 }, { "type": "loss", "content": 0.007846372202038765, "timestamp": "2025-09-10 02:29:10.310742", "step": 3067, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:10.340271", "step": 3067, "epoch": 2 }, { "type": "loss", "content": 0.03626176342368126, "timestamp": "2025-09-10 02:29:10.363868", "step": 3068, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:10.393584", "step": 3068, "epoch": 2 }, { "type": "loss", "content": 0.00045175960985943675, "timestamp": "2025-09-10 02:29:10.395595", "step": 3069, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:10.424405", "step": 3069, "epoch": 2 }, { "type": "loss", "content": 0.003350152401253581, "timestamp": "2025-09-10 02:29:10.426402", "step": 3070, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:10.455862", "step": 3070, "epoch": 2 }, { "type": "loss", "content": 0.031238805502653122, "timestamp": "2025-09-10 02:29:10.457727", "step": 3071, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:10.486844", "step": 3071, "epoch": 2 }, { "type": "loss", "content": 0.006795932073146105, "timestamp": "2025-09-10 02:29:10.510611", "step": 3072, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:10.540978", "step": 3072, "epoch": 2 }, { "type": "loss", "content": 0.00045838733785785735, "timestamp": "2025-09-10 02:29:10.543152", "step": 3073, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:10.572814", "step": 3073, "epoch": 2 }, { "type": "loss", "content": 0.06630359590053558, "timestamp": "2025-09-10 02:29:10.575224", "step": 3074, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:10.604528", "step": 3074, "epoch": 2 }, { "type": "loss", "content": 0.0024545660708099604, "timestamp": "2025-09-10 02:29:10.606815", "step": 3075, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:10.636991", "step": 3075, "epoch": 2 }, { "type": "loss", "content": 0.001574228866957128, "timestamp": "2025-09-10 02:29:10.660372", "step": 3076, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:10.689435", "step": 3076, "epoch": 2 }, { "type": "loss", "content": 0.002156176371499896, "timestamp": "2025-09-10 02:29:10.691461", "step": 3077, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:10.720906", "step": 3077, "epoch": 2 }, { "type": "loss", "content": 0.0014538370305672288, "timestamp": "2025-09-10 02:29:10.722831", "step": 3078, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:10.751698", "step": 3078, "epoch": 2 }, { "type": "loss", "content": 0.0026938088703900576, "timestamp": "2025-09-10 02:29:10.753379", "step": 3079, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:10.782567", "step": 3079, "epoch": 2 }, { "type": "loss", "content": 0.0007980852387845516, "timestamp": "2025-09-10 02:29:10.805950", "step": 3080, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:10.835722", "step": 3080, "epoch": 2 }, { "type": "loss", "content": 0.04152765870094299, "timestamp": "2025-09-10 02:29:10.837454", "step": 3081, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:10.866182", "step": 3081, "epoch": 2 }, { "type": "loss", "content": 0.025887921452522278, "timestamp": "2025-09-10 02:29:10.868036", "step": 3082, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:10.896838", "step": 3082, "epoch": 2 }, { "type": "loss", "content": 0.03414091467857361, "timestamp": "2025-09-10 02:29:10.898560", "step": 3083, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:10.927043", "step": 3083, "epoch": 2 }, { "type": "loss", "content": 0.01751689985394478, "timestamp": "2025-09-10 02:29:10.950418", "step": 3084, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:10.979233", "step": 3084, "epoch": 2 }, { "type": "loss", "content": 0.002506793709471822, "timestamp": "2025-09-10 02:29:10.981005", "step": 3085, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:11.010473", "step": 3085, "epoch": 2 }, { "type": "loss", "content": 0.0034624426625669003, "timestamp": "2025-09-10 02:29:11.012345", "step": 3086, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:11.041702", "step": 3086, "epoch": 2 }, { "type": "loss", "content": 0.003846134291961789, "timestamp": "2025-09-10 02:29:11.043851", "step": 3087, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:11.073319", "step": 3087, "epoch": 2 }, { "type": "loss", "content": 0.001097044674679637, "timestamp": "2025-09-10 02:29:11.096912", "step": 3088, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:11.126132", "step": 3088, "epoch": 2 }, { "type": "loss", "content": 0.008880463428795338, "timestamp": "2025-09-10 02:29:11.127791", "step": 3089, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:11.156288", "step": 3089, "epoch": 2 }, { "type": "loss", "content": 0.05466269701719284, "timestamp": "2025-09-10 02:29:11.158283", "step": 3090, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:11.188170", "step": 3090, "epoch": 2 }, { "type": "loss", "content": 0.00393714802339673, "timestamp": "2025-09-10 02:29:11.191565", "step": 3091, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:11.223822", "step": 3091, "epoch": 2 }, { "type": "loss", "content": 0.017720578238368034, "timestamp": "2025-09-10 02:29:11.247372", "step": 3092, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:11.277390", "step": 3092, "epoch": 2 }, { "type": "loss", "content": 0.007730663288384676, "timestamp": "2025-09-10 02:29:11.279485", "step": 3093, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:11.308856", "step": 3093, "epoch": 2 }, { "type": "loss", "content": 0.01206688117235899, "timestamp": "2025-09-10 02:29:11.311077", "step": 3094, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:11.340516", "step": 3094, "epoch": 2 }, { "type": "loss", "content": 0.006622540298849344, "timestamp": "2025-09-10 02:29:11.342220", "step": 3095, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:11.371695", "step": 3095, "epoch": 2 }, { "type": "loss", "content": 0.008492736145853996, "timestamp": "2025-09-10 02:29:11.395377", "step": 3096, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:11.424970", "step": 3096, "epoch": 2 }, { "type": "loss", "content": 0.01140634622424841, "timestamp": "2025-09-10 02:29:11.426795", "step": 3097, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:11.455624", "step": 3097, "epoch": 2 }, { "type": "loss", "content": 0.025438308715820312, "timestamp": "2025-09-10 02:29:11.457428", "step": 3098, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:11.486488", "step": 3098, "epoch": 2 }, { "type": "loss", "content": 0.0009789171162992716, "timestamp": "2025-09-10 02:29:11.488393", "step": 3099, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:11.517402", "step": 3099, "epoch": 2 }, { "type": "loss", "content": 0.0019120399374514818, "timestamp": "2025-09-10 02:29:11.540522", "step": 3100, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:11.569700", "step": 3100, "epoch": 2 }, { "type": "loss", "content": 0.0009125957149080932, "timestamp": "2025-09-10 02:29:11.571445", "step": 3101, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:11.600430", "step": 3101, "epoch": 2 }, { "type": "loss", "content": 0.03425338864326477, "timestamp": "2025-09-10 02:29:11.602418", "step": 3102, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:11.631337", "step": 3102, "epoch": 2 }, { "type": "loss", "content": 0.05665156617760658, "timestamp": "2025-09-10 02:29:11.633280", "step": 3103, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:11.662255", "step": 3103, "epoch": 2 }, { "type": "loss", "content": 0.003658808534964919, "timestamp": "2025-09-10 02:29:11.685879", "step": 3104, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:11.714811", "step": 3104, "epoch": 2 }, { "type": "loss", "content": 0.03745085000991821, "timestamp": "2025-09-10 02:29:11.716590", "step": 3105, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:11.745536", "step": 3105, "epoch": 2 }, { "type": "loss", "content": 0.0003341633710078895, "timestamp": "2025-09-10 02:29:11.747307", "step": 3106, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:11.776425", "step": 3106, "epoch": 2 }, { "type": "loss", "content": 0.002007808769121766, "timestamp": "2025-09-10 02:29:11.778426", "step": 3107, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:11.807517", "step": 3107, "epoch": 2 }, { "type": "loss", "content": 0.005115970969200134, "timestamp": "2025-09-10 02:29:11.831333", "step": 3108, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:11.860781", "step": 3108, "epoch": 2 }, { "type": "loss", "content": 0.015206390991806984, "timestamp": "2025-09-10 02:29:11.862417", "step": 3109, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:11.891718", "step": 3109, "epoch": 2 }, { "type": "loss", "content": 0.03465424105525017, "timestamp": "2025-09-10 02:29:11.893485", "step": 3110, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:11.923847", "step": 3110, "epoch": 2 }, { "type": "loss", "content": 0.0011160260764881968, "timestamp": "2025-09-10 02:29:11.925569", "step": 3111, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:11.954453", "step": 3111, "epoch": 2 }, { "type": "loss", "content": 0.005928417202085257, "timestamp": "2025-09-10 02:29:11.978101", "step": 3112, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:12.009638", "step": 3112, "epoch": 2 }, { "type": "loss", "content": 0.01995454542338848, "timestamp": "2025-09-10 02:29:12.011592", "step": 3113, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:12.041226", "step": 3113, "epoch": 2 }, { "type": "loss", "content": 0.0028909172397106886, "timestamp": "2025-09-10 02:29:12.043294", "step": 3114, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:12.072585", "step": 3114, "epoch": 2 }, { "type": "loss", "content": 0.014027678407728672, "timestamp": "2025-09-10 02:29:12.076082", "step": 3115, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:12.109868", "step": 3115, "epoch": 2 }, { "type": "loss", "content": 0.006129259709268808, "timestamp": "2025-09-10 02:29:12.133368", "step": 3116, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:12.166686", "step": 3116, "epoch": 2 }, { "type": "loss", "content": 0.0013742366572842002, "timestamp": "2025-09-10 02:29:12.168508", "step": 3117, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:12.199439", "step": 3117, "epoch": 2 }, { "type": "loss", "content": 0.0012813439825549722, "timestamp": "2025-09-10 02:29:12.202275", "step": 3118, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:12.232438", "step": 3118, "epoch": 2 }, { "type": "loss", "content": 0.0006782165146432817, "timestamp": "2025-09-10 02:29:12.234435", "step": 3119, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:12.269415", "step": 3119, "epoch": 2 }, { "type": "loss", "content": 0.004172059241682291, "timestamp": "2025-09-10 02:29:12.295185", "step": 3120, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:12.327121", "step": 3120, "epoch": 2 }, { "type": "loss", "content": 0.0012155737495049834, "timestamp": "2025-09-10 02:29:12.328845", "step": 3121, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:12.357861", "step": 3121, "epoch": 2 }, { "type": "loss", "content": 0.02209334261715412, "timestamp": "2025-09-10 02:29:12.360064", "step": 3122, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:12.389304", "step": 3122, "epoch": 2 }, { "type": "loss", "content": 0.014424113556742668, "timestamp": "2025-09-10 02:29:12.391307", "step": 3123, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:12.420607", "step": 3123, "epoch": 2 }, { "type": "loss", "content": 0.0037881096359342337, "timestamp": "2025-09-10 02:29:12.443950", "step": 3124, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:12.476094", "step": 3124, "epoch": 2 }, { "type": "loss", "content": 0.0020416802726686, "timestamp": "2025-09-10 02:29:12.478484", "step": 3125, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:12.512506", "step": 3125, "epoch": 2 }, { "type": "loss", "content": 0.016164317727088928, "timestamp": "2025-09-10 02:29:12.514475", "step": 3126, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:12.544233", "step": 3126, "epoch": 2 }, { "type": "loss", "content": 0.025949114933609962, "timestamp": "2025-09-10 02:29:12.546443", "step": 3127, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:12.576481", "step": 3127, "epoch": 2 }, { "type": "loss", "content": 0.04568003490567207, "timestamp": "2025-09-10 02:29:12.600043", "step": 3128, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:12.629595", "step": 3128, "epoch": 2 }, { "type": "loss", "content": 0.01625070534646511, "timestamp": "2025-09-10 02:29:12.636823", "step": 3129, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:12.670142", "step": 3129, "epoch": 2 }, { "type": "loss", "content": 0.007189703173935413, "timestamp": "2025-09-10 02:29:12.671999", "step": 3130, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:12.700552", "step": 3130, "epoch": 2 }, { "type": "loss", "content": 0.00547413295134902, "timestamp": "2025-09-10 02:29:12.702551", "step": 3131, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:12.731239", "step": 3131, "epoch": 2 }, { "type": "loss", "content": 0.0035093778278678656, "timestamp": "2025-09-10 02:29:12.757388", "step": 3132, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:12.786869", "step": 3132, "epoch": 2 }, { "type": "loss", "content": 0.002069595968350768, "timestamp": "2025-09-10 02:29:12.788735", "step": 3133, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:12.817485", "step": 3133, "epoch": 2 }, { "type": "loss", "content": 0.017753830179572105, "timestamp": "2025-09-10 02:29:12.819442", "step": 3134, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:12.851231", "step": 3134, "epoch": 2 }, { "type": "loss", "content": 0.029736889526247978, "timestamp": "2025-09-10 02:29:12.852979", "step": 3135, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:12.881858", "step": 3135, "epoch": 2 }, { "type": "loss", "content": 0.002453204710036516, "timestamp": "2025-09-10 02:29:12.907373", "step": 3136, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:12.936718", "step": 3136, "epoch": 2 }, { "type": "loss", "content": 0.005446590483188629, "timestamp": "2025-09-10 02:29:12.938835", "step": 3137, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:12.969332", "step": 3137, "epoch": 2 }, { "type": "loss", "content": 0.022114822641015053, "timestamp": "2025-09-10 02:29:12.971338", "step": 3138, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:13.000291", "step": 3138, "epoch": 2 }, { "type": "loss", "content": 0.0009241848601959646, "timestamp": "2025-09-10 02:29:13.002342", "step": 3139, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:13.032016", "step": 3139, "epoch": 2 }, { "type": "loss", "content": 0.009086194448173046, "timestamp": "2025-09-10 02:29:13.055454", "step": 3140, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:13.085175", "step": 3140, "epoch": 2 }, { "type": "loss", "content": 0.0011872841278091073, "timestamp": "2025-09-10 02:29:13.086856", "step": 3141, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:13.116138", "step": 3141, "epoch": 2 }, { "type": "loss", "content": 0.002385688479989767, "timestamp": "2025-09-10 02:29:13.117898", "step": 3142, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:13.147242", "step": 3142, "epoch": 2 }, { "type": "loss", "content": 0.009739254601299763, "timestamp": "2025-09-10 02:29:13.149260", "step": 3143, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:13.182971", "step": 3143, "epoch": 2 }, { "type": "loss", "content": 0.01810370199382305, "timestamp": "2025-09-10 02:29:13.206462", "step": 3144, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:13.236033", "step": 3144, "epoch": 2 }, { "type": "loss", "content": 0.006289140321314335, "timestamp": "2025-09-10 02:29:13.238060", "step": 3145, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:13.267260", "step": 3145, "epoch": 2 }, { "type": "loss", "content": 0.003966487944126129, "timestamp": "2025-09-10 02:29:13.269118", "step": 3146, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:13.298131", "step": 3146, "epoch": 2 }, { "type": "loss", "content": 0.005891531240195036, "timestamp": "2025-09-10 02:29:13.300236", "step": 3147, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:13.329705", "step": 3147, "epoch": 2 }, { "type": "loss", "content": 0.002595615340396762, "timestamp": "2025-09-10 02:29:13.353317", "step": 3148, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:13.383691", "step": 3148, "epoch": 2 }, { "type": "loss", "content": 0.006208585109561682, "timestamp": "2025-09-10 02:29:13.387017", "step": 3149, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:13.417449", "step": 3149, "epoch": 2 }, { "type": "loss", "content": 0.01005286630243063, "timestamp": "2025-09-10 02:29:13.419480", "step": 3150, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:13.448964", "step": 3150, "epoch": 2 }, { "type": "loss", "content": 0.0018498104764148593, "timestamp": "2025-09-10 02:29:13.451127", "step": 3151, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:13.481616", "step": 3151, "epoch": 2 }, { "type": "loss", "content": 0.0013147999998182058, "timestamp": "2025-09-10 02:29:13.505335", "step": 3152, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:13.534788", "step": 3152, "epoch": 2 }, { "type": "loss", "content": 0.03542206436395645, "timestamp": "2025-09-10 02:29:13.538461", "step": 3153, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:13.567106", "step": 3153, "epoch": 2 }, { "type": "loss", "content": 0.003105960553511977, "timestamp": "2025-09-10 02:29:13.569326", "step": 3154, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:13.599576", "step": 3154, "epoch": 2 }, { "type": "loss", "content": 0.0005990318604744971, "timestamp": "2025-09-10 02:29:13.601299", "step": 3155, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:13.630475", "step": 3155, "epoch": 2 }, { "type": "loss", "content": 0.0057794926688075066, "timestamp": "2025-09-10 02:29:13.653978", "step": 3156, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:13.687824", "step": 3156, "epoch": 2 }, { "type": "loss", "content": 0.007182592060416937, "timestamp": "2025-09-10 02:29:13.692296", "step": 3157, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:13.721570", "step": 3157, "epoch": 2 }, { "type": "loss", "content": 0.0039300271309912205, "timestamp": "2025-09-10 02:29:13.723704", "step": 3158, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:13.752248", "step": 3158, "epoch": 2 }, { "type": "loss", "content": 0.013558912090957165, "timestamp": "2025-09-10 02:29:13.756519", "step": 3159, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:13.785751", "step": 3159, "epoch": 2 }, { "type": "loss", "content": 0.00507943332195282, "timestamp": "2025-09-10 02:29:13.809314", "step": 3160, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:13.838224", "step": 3160, "epoch": 2 }, { "type": "loss", "content": 0.0003966097719967365, "timestamp": "2025-09-10 02:29:13.841903", "step": 3161, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:13.873489", "step": 3161, "epoch": 2 }, { "type": "loss", "content": 0.0024395675864070654, "timestamp": "2025-09-10 02:29:13.875363", "step": 3162, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:13.904415", "step": 3162, "epoch": 2 }, { "type": "loss", "content": 0.004043197724968195, "timestamp": "2025-09-10 02:29:13.906523", "step": 3163, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:13.935837", "step": 3163, "epoch": 2 }, { "type": "loss", "content": 0.002843863097950816, "timestamp": "2025-09-10 02:29:13.959573", "step": 3164, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:13.989041", "step": 3164, "epoch": 2 }, { "type": "loss", "content": 0.037573374807834625, "timestamp": "2025-09-10 02:29:13.991183", "step": 3165, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:14.020714", "step": 3165, "epoch": 2 }, { "type": "loss", "content": 0.005158807151019573, "timestamp": "2025-09-10 02:29:14.022916", "step": 3166, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:14.062017", "step": 3166, "epoch": 2 }, { "type": "loss", "content": 0.00035865677637048066, "timestamp": "2025-09-10 02:29:14.063947", "step": 3167, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:14.092466", "step": 3167, "epoch": 2 }, { "type": "loss", "content": 0.017983097583055496, "timestamp": "2025-09-10 02:29:14.116099", "step": 3168, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:14.144899", "step": 3168, "epoch": 2 }, { "type": "loss", "content": 0.04602109640836716, "timestamp": "2025-09-10 02:29:14.146907", "step": 3169, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:14.175769", "step": 3169, "epoch": 2 }, { "type": "loss", "content": 0.0015665811952203512, "timestamp": "2025-09-10 02:29:14.177484", "step": 3170, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:14.206385", "step": 3170, "epoch": 2 }, { "type": "loss", "content": 0.0035023889504373074, "timestamp": "2025-09-10 02:29:14.208349", "step": 3171, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:14.237520", "step": 3171, "epoch": 2 }, { "type": "loss", "content": 0.0018404703587293625, "timestamp": "2025-09-10 02:29:14.262373", "step": 3172, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:14.291094", "step": 3172, "epoch": 2 }, { "type": "loss", "content": 0.01921190693974495, "timestamp": "2025-09-10 02:29:14.292867", "step": 3173, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:14.321432", "step": 3173, "epoch": 2 }, { "type": "loss", "content": 0.005809496622532606, "timestamp": "2025-09-10 02:29:14.323544", "step": 3174, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:14.352382", "step": 3174, "epoch": 2 }, { "type": "loss", "content": 0.0011441393289715052, "timestamp": "2025-09-10 02:29:14.354242", "step": 3175, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:14.383490", "step": 3175, "epoch": 2 }, { "type": "loss", "content": 0.0005539747653529048, "timestamp": "2025-09-10 02:29:14.407176", "step": 3176, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:14.436374", "step": 3176, "epoch": 2 }, { "type": "loss", "content": 0.0010046872776001692, "timestamp": "2025-09-10 02:29:14.438267", "step": 3177, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:14.466899", "step": 3177, "epoch": 2 }, { "type": "loss", "content": 0.009421751834452152, "timestamp": "2025-09-10 02:29:14.469011", "step": 3178, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:14.497943", "step": 3178, "epoch": 2 }, { "type": "loss", "content": 0.0006441475707106292, "timestamp": "2025-09-10 02:29:14.500037", "step": 3179, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:14.528936", "step": 3179, "epoch": 2 }, { "type": "loss", "content": 0.04342738166451454, "timestamp": "2025-09-10 02:29:14.552663", "step": 3180, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:14.581745", "step": 3180, "epoch": 2 }, { "type": "loss", "content": 0.0749165415763855, "timestamp": "2025-09-10 02:29:14.584038", "step": 3181, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:14.613276", "step": 3181, "epoch": 2 }, { "type": "loss", "content": 0.0005101025453768671, "timestamp": "2025-09-10 02:29:14.615323", "step": 3182, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:14.644711", "step": 3182, "epoch": 2 }, { "type": "loss", "content": 0.007324092090129852, "timestamp": "2025-09-10 02:29:14.646424", "step": 3183, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:14.675040", "step": 3183, "epoch": 2 }, { "type": "loss", "content": 0.06100691482424736, "timestamp": "2025-09-10 02:29:14.698540", "step": 3184, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:14.728525", "step": 3184, "epoch": 2 }, { "type": "loss", "content": 0.003953658509999514, "timestamp": "2025-09-10 02:29:14.730418", "step": 3185, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:14.759563", "step": 3185, "epoch": 2 }, { "type": "loss", "content": 0.008083908818662167, "timestamp": "2025-09-10 02:29:14.761682", "step": 3186, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:14.790780", "step": 3186, "epoch": 2 }, { "type": "loss", "content": 0.051983606070280075, "timestamp": "2025-09-10 02:29:14.792681", "step": 3187, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:14.821914", "step": 3187, "epoch": 2 }, { "type": "loss", "content": 0.0013342405436560512, "timestamp": "2025-09-10 02:29:14.845667", "step": 3188, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:14.874837", "step": 3188, "epoch": 2 }, { "type": "loss", "content": 0.00035986039438284934, "timestamp": "2025-09-10 02:29:14.878690", "step": 3189, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:14.909883", "step": 3189, "epoch": 2 }, { "type": "loss", "content": 0.007039155811071396, "timestamp": "2025-09-10 02:29:14.911590", "step": 3190, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:14.949537", "step": 3190, "epoch": 2 }, { "type": "loss", "content": 0.029422583058476448, "timestamp": "2025-09-10 02:29:14.951515", "step": 3191, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:14.981321", "step": 3191, "epoch": 2 }, { "type": "loss", "content": 0.029646027833223343, "timestamp": "2025-09-10 02:29:15.004893", "step": 3192, "epoch": 2 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 96 ], "batch_size": 8, "flops": 1898404492032 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 5, 80 ], "batch_size": 8, "flops": 1582003754624 } ], "timestamp": "2025-09-10 02:29:16.917711", "step": 3192, "epoch": 2 }, { "type": "pplx", "content": 2238869.448983728, "timestamp": "2025-09-10 02:29:16.919955", "step": 3192, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:16.948711", "step": 3192, "epoch": 2 }, { "type": "loss", "content": 0.0014405797701328993, "timestamp": "2025-09-10 02:29:16.950660", "step": 3193, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:16.979878", "step": 3193, "epoch": 2 }, { "type": "loss", "content": 0.04845903441309929, "timestamp": "2025-09-10 02:29:16.982010", "step": 3194, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:17.011306", "step": 3194, "epoch": 2 }, { "type": "loss", "content": 0.00040100738988257945, "timestamp": "2025-09-10 02:29:17.013302", "step": 3195, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:17.042693", "step": 3195, "epoch": 2 }, { "type": "loss", "content": 0.0234959926456213, "timestamp": "2025-09-10 02:29:17.066436", "step": 3196, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:17.094820", "step": 3196, "epoch": 2 }, { "type": "loss", "content": 0.0010534286266192794, "timestamp": "2025-09-10 02:29:17.096830", "step": 3197, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:17.125435", "step": 3197, "epoch": 2 }, { "type": "loss", "content": 0.0017616221448406577, "timestamp": "2025-09-10 02:29:17.127380", "step": 3198, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:17.156707", "step": 3198, "epoch": 2 }, { "type": "loss", "content": 0.011382815428078175, "timestamp": "2025-09-10 02:29:17.158832", "step": 3199, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:17.188047", "step": 3199, "epoch": 2 }, { "type": "loss", "content": 0.009754389524459839, "timestamp": "2025-09-10 02:29:17.211480", "step": 3200, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:17.239983", "step": 3200, "epoch": 2 }, { "type": "loss", "content": 0.00555665185675025, "timestamp": "2025-09-10 02:29:17.241751", "step": 3201, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:17.270341", "step": 3201, "epoch": 2 }, { "type": "loss", "content": 0.00267073349095881, "timestamp": "2025-09-10 02:29:17.272313", "step": 3202, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:17.301575", "step": 3202, "epoch": 2 }, { "type": "loss", "content": 0.004118208773434162, "timestamp": "2025-09-10 02:29:17.303636", "step": 3203, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:17.332445", "step": 3203, "epoch": 2 }, { "type": "loss", "content": 0.014600671827793121, "timestamp": "2025-09-10 02:29:17.356133", "step": 3204, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:17.385121", "step": 3204, "epoch": 2 }, { "type": "loss", "content": 0.003516458673402667, "timestamp": "2025-09-10 02:29:17.388132", "step": 3205, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:17.419277", "step": 3205, "epoch": 2 }, { "type": "loss", "content": 0.007552871946245432, "timestamp": "2025-09-10 02:29:17.421324", "step": 3206, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:17.450774", "step": 3206, "epoch": 2 }, { "type": "loss", "content": 0.001352613908238709, "timestamp": "2025-09-10 02:29:17.452703", "step": 3207, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:17.481562", "step": 3207, "epoch": 2 }, { "type": "loss", "content": 0.005524831358343363, "timestamp": "2025-09-10 02:29:17.505101", "step": 3208, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:17.534019", "step": 3208, "epoch": 2 }, { "type": "loss", "content": 0.0035007346887141466, "timestamp": "2025-09-10 02:29:17.535912", "step": 3209, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:17.564767", "step": 3209, "epoch": 2 }, { "type": "loss", "content": 0.019017895683646202, "timestamp": "2025-09-10 02:29:17.566710", "step": 3210, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:17.595871", "step": 3210, "epoch": 2 }, { "type": "loss", "content": 0.000336126220645383, "timestamp": "2025-09-10 02:29:17.597842", "step": 3211, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:17.627635", "step": 3211, "epoch": 2 }, { "type": "loss", "content": 0.003399396315217018, "timestamp": "2025-09-10 02:29:17.651049", "step": 3212, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:17.679881", "step": 3212, "epoch": 2 }, { "type": "loss", "content": 0.003790304297581315, "timestamp": "2025-09-10 02:29:17.681842", "step": 3213, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:17.710718", "step": 3213, "epoch": 2 }, { "type": "loss", "content": 0.02069282904267311, "timestamp": "2025-09-10 02:29:17.712728", "step": 3214, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:17.741572", "step": 3214, "epoch": 2 }, { "type": "loss", "content": 0.054744090884923935, "timestamp": "2025-09-10 02:29:17.743517", "step": 3215, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:17.771868", "step": 3215, "epoch": 2 }, { "type": "loss", "content": 0.04676416888833046, "timestamp": "2025-09-10 02:29:17.795407", "step": 3216, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:17.824307", "step": 3216, "epoch": 2 }, { "type": "loss", "content": 0.030757104977965355, "timestamp": "2025-09-10 02:29:17.826475", "step": 3217, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:17.855866", "step": 3217, "epoch": 2 }, { "type": "loss", "content": 0.026507088914513588, "timestamp": "2025-09-10 02:29:17.857790", "step": 3218, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:17.886091", "step": 3218, "epoch": 2 }, { "type": "loss", "content": 0.008432825095951557, "timestamp": "2025-09-10 02:29:17.888081", "step": 3219, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:17.916796", "step": 3219, "epoch": 2 }, { "type": "loss", "content": 0.0012788543244823813, "timestamp": "2025-09-10 02:29:17.940030", "step": 3220, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:17.969161", "step": 3220, "epoch": 2 }, { "type": "loss", "content": 0.001838106312789023, "timestamp": "2025-09-10 02:29:17.970967", "step": 3221, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:17.999599", "step": 3221, "epoch": 2 }, { "type": "loss", "content": 0.009462250396609306, "timestamp": "2025-09-10 02:29:18.001572", "step": 3222, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:18.030394", "step": 3222, "epoch": 2 }, { "type": "loss", "content": 0.006963053252547979, "timestamp": "2025-09-10 02:29:18.032590", "step": 3223, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:18.061360", "step": 3223, "epoch": 2 }, { "type": "loss", "content": 0.04160519689321518, "timestamp": "2025-09-10 02:29:18.085619", "step": 3224, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:18.115376", "step": 3224, "epoch": 2 }, { "type": "loss", "content": 0.037564489990472794, "timestamp": "2025-09-10 02:29:18.118287", "step": 3225, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:18.149599", "step": 3225, "epoch": 2 }, { "type": "loss", "content": 0.0014786241808906198, "timestamp": "2025-09-10 02:29:18.151913", "step": 3226, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:18.180635", "step": 3226, "epoch": 2 }, { "type": "loss", "content": 0.041148122400045395, "timestamp": "2025-09-10 02:29:18.186675", "step": 3227, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:18.218178", "step": 3227, "epoch": 2 }, { "type": "loss", "content": 0.026159171015024185, "timestamp": "2025-09-10 02:29:18.241412", "step": 3228, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:18.272812", "step": 3228, "epoch": 2 }, { "type": "loss", "content": 0.005518940277397633, "timestamp": "2025-09-10 02:29:18.275993", "step": 3229, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:18.306503", "step": 3229, "epoch": 2 }, { "type": "loss", "content": 0.03149283677339554, "timestamp": "2025-09-10 02:29:18.309682", "step": 3230, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:18.339620", "step": 3230, "epoch": 2 }, { "type": "loss", "content": 0.004583963192999363, "timestamp": "2025-09-10 02:29:18.341457", "step": 3231, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:18.374374", "step": 3231, "epoch": 2 }, { "type": "loss", "content": 0.002183470642194152, "timestamp": "2025-09-10 02:29:18.397966", "step": 3232, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:18.429247", "step": 3232, "epoch": 2 }, { "type": "loss", "content": 0.005411368329077959, "timestamp": "2025-09-10 02:29:18.431361", "step": 3233, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:18.460598", "step": 3233, "epoch": 2 }, { "type": "loss", "content": 0.024356532841920853, "timestamp": "2025-09-10 02:29:18.462430", "step": 3234, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:18.491545", "step": 3234, "epoch": 2 }, { "type": "loss", "content": 0.005300631280988455, "timestamp": "2025-09-10 02:29:18.493598", "step": 3235, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:18.522878", "step": 3235, "epoch": 2 }, { "type": "loss", "content": 0.0028388681821525097, "timestamp": "2025-09-10 02:29:18.546481", "step": 3236, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:18.575974", "step": 3236, "epoch": 2 }, { "type": "loss", "content": 0.0018885548925027251, "timestamp": "2025-09-10 02:29:18.577886", "step": 3237, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:18.606683", "step": 3237, "epoch": 2 }, { "type": "loss", "content": 0.0013392126420512795, "timestamp": "2025-09-10 02:29:18.608355", "step": 3238, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:18.637961", "step": 3238, "epoch": 2 }, { "type": "loss", "content": 0.005057408940047026, "timestamp": "2025-09-10 02:29:18.639796", "step": 3239, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:18.669231", "step": 3239, "epoch": 2 }, { "type": "loss", "content": 0.06259658187627792, "timestamp": "2025-09-10 02:29:18.693014", "step": 3240, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:18.726939", "step": 3240, "epoch": 2 }, { "type": "loss", "content": 0.002293581375852227, "timestamp": "2025-09-10 02:29:18.728991", "step": 3241, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:18.763746", "step": 3241, "epoch": 2 }, { "type": "loss", "content": 0.002704497892409563, "timestamp": "2025-09-10 02:29:18.765808", "step": 3242, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:18.794282", "step": 3242, "epoch": 2 }, { "type": "loss", "content": 0.013686344027519226, "timestamp": "2025-09-10 02:29:18.796458", "step": 3243, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:18.825726", "step": 3243, "epoch": 2 }, { "type": "loss", "content": 0.040091224014759064, "timestamp": "2025-09-10 02:29:18.849135", "step": 3244, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:18.878808", "step": 3244, "epoch": 2 }, { "type": "loss", "content": 0.027681689709424973, "timestamp": "2025-09-10 02:29:18.880902", "step": 3245, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:18.909903", "step": 3245, "epoch": 2 }, { "type": "loss", "content": 0.002253680257126689, "timestamp": "2025-09-10 02:29:18.911965", "step": 3246, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:18.940435", "step": 3246, "epoch": 2 }, { "type": "loss", "content": 0.0012212129076942801, "timestamp": "2025-09-10 02:29:18.942443", "step": 3247, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:18.971327", "step": 3247, "epoch": 2 }, { "type": "loss", "content": 0.016619175672531128, "timestamp": "2025-09-10 02:29:18.995032", "step": 3248, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:19.024290", "step": 3248, "epoch": 2 }, { "type": "loss", "content": 0.006138200405985117, "timestamp": "2025-09-10 02:29:19.026251", "step": 3249, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:19.055531", "step": 3249, "epoch": 2 }, { "type": "loss", "content": 0.0033281215000897646, "timestamp": "2025-09-10 02:29:19.057400", "step": 3250, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:19.086689", "step": 3250, "epoch": 2 }, { "type": "loss", "content": 0.004846022929996252, "timestamp": "2025-09-10 02:29:19.088714", "step": 3251, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:19.117724", "step": 3251, "epoch": 2 }, { "type": "loss", "content": 0.004795127082616091, "timestamp": "2025-09-10 02:29:19.141395", "step": 3252, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:19.170436", "step": 3252, "epoch": 2 }, { "type": "loss", "content": 0.023141562938690186, "timestamp": "2025-09-10 02:29:19.172389", "step": 3253, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:19.201348", "step": 3253, "epoch": 2 }, { "type": "loss", "content": 0.005410187877714634, "timestamp": "2025-09-10 02:29:19.203159", "step": 3254, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:19.231950", "step": 3254, "epoch": 2 }, { "type": "loss", "content": 0.0019504806259647012, "timestamp": "2025-09-10 02:29:19.234013", "step": 3255, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:19.264410", "step": 3255, "epoch": 2 }, { "type": "loss", "content": 0.0038690518122166395, "timestamp": "2025-09-10 02:29:19.287874", "step": 3256, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:19.316997", "step": 3256, "epoch": 2 }, { "type": "loss", "content": 0.0039766039699316025, "timestamp": "2025-09-10 02:29:19.318871", "step": 3257, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:19.347807", "step": 3257, "epoch": 2 }, { "type": "loss", "content": 0.008529971353709698, "timestamp": "2025-09-10 02:29:19.349776", "step": 3258, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:19.379128", "step": 3258, "epoch": 2 }, { "type": "loss", "content": 0.0007365807541646063, "timestamp": "2025-09-10 02:29:19.381049", "step": 3259, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:19.409764", "step": 3259, "epoch": 2 }, { "type": "loss", "content": 0.006635564845055342, "timestamp": "2025-09-10 02:29:19.433404", "step": 3260, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:19.462525", "step": 3260, "epoch": 2 }, { "type": "loss", "content": 0.019494740292429924, "timestamp": "2025-09-10 02:29:19.464652", "step": 3261, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:19.493866", "step": 3261, "epoch": 2 }, { "type": "loss", "content": 0.007422385271638632, "timestamp": "2025-09-10 02:29:19.495885", "step": 3262, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:19.524614", "step": 3262, "epoch": 2 }, { "type": "loss", "content": 0.016547175124287605, "timestamp": "2025-09-10 02:29:19.526523", "step": 3263, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:19.555385", "step": 3263, "epoch": 2 }, { "type": "loss", "content": 0.0021990910172462463, "timestamp": "2025-09-10 02:29:19.578769", "step": 3264, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:19.607839", "step": 3264, "epoch": 2 }, { "type": "loss", "content": 0.01675112545490265, "timestamp": "2025-09-10 02:29:19.609665", "step": 3265, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:19.638344", "step": 3265, "epoch": 2 }, { "type": "loss", "content": 0.027797266840934753, "timestamp": "2025-09-10 02:29:19.640283", "step": 3266, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:19.668865", "step": 3266, "epoch": 2 }, { "type": "loss", "content": 0.019333451986312866, "timestamp": "2025-09-10 02:29:19.670665", "step": 3267, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:19.699287", "step": 3267, "epoch": 2 }, { "type": "loss", "content": 0.02450748346745968, "timestamp": "2025-09-10 02:29:19.723794", "step": 3268, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:19.752733", "step": 3268, "epoch": 2 }, { "type": "loss", "content": 0.03439665958285332, "timestamp": "2025-09-10 02:29:19.754467", "step": 3269, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:19.785703", "step": 3269, "epoch": 2 }, { "type": "loss", "content": 0.03972681984305382, "timestamp": "2025-09-10 02:29:19.787744", "step": 3270, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:19.816911", "step": 3270, "epoch": 2 }, { "type": "loss", "content": 0.003959035966545343, "timestamp": "2025-09-10 02:29:19.818959", "step": 3271, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:19.848244", "step": 3271, "epoch": 2 }, { "type": "loss", "content": 0.007591621018946171, "timestamp": "2025-09-10 02:29:19.871710", "step": 3272, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:19.900934", "step": 3272, "epoch": 2 }, { "type": "loss", "content": 0.009627032093703747, "timestamp": "2025-09-10 02:29:19.902951", "step": 3273, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:19.931807", "step": 3273, "epoch": 2 }, { "type": "loss", "content": 0.006072872783988714, "timestamp": "2025-09-10 02:29:19.933602", "step": 3274, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:19.963636", "step": 3274, "epoch": 2 }, { "type": "loss", "content": 0.0012307605938985944, "timestamp": "2025-09-10 02:29:19.965723", "step": 3275, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:19.994817", "step": 3275, "epoch": 2 }, { "type": "loss", "content": 0.010460141114890575, "timestamp": "2025-09-10 02:29:20.020237", "step": 3276, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:20.049699", "step": 3276, "epoch": 2 }, { "type": "loss", "content": 0.009266098029911518, "timestamp": "2025-09-10 02:29:20.051892", "step": 3277, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:20.080640", "step": 3277, "epoch": 2 }, { "type": "loss", "content": 0.001034657354466617, "timestamp": "2025-09-10 02:29:20.082667", "step": 3278, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:20.111708", "step": 3278, "epoch": 2 }, { "type": "loss", "content": 0.00271918554790318, "timestamp": "2025-09-10 02:29:20.113677", "step": 3279, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:20.142309", "step": 3279, "epoch": 2 }, { "type": "loss", "content": 0.0036298034247010946, "timestamp": "2025-09-10 02:29:20.165832", "step": 3280, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:29:20.194624", "step": 3280, "epoch": 2 }, { "type": "loss", "content": 0.0019971595611423254, "timestamp": "2025-09-10 02:29:20.196659", "step": 3281, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:20.225641", "step": 3281, "epoch": 2 }, { "type": "loss", "content": 0.0014646511990576982, "timestamp": "2025-09-10 02:29:20.227621", "step": 3282, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:20.256695", "step": 3282, "epoch": 2 }, { "type": "loss", "content": 0.04680074006319046, "timestamp": "2025-09-10 02:29:20.258589", "step": 3283, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:20.287414", "step": 3283, "epoch": 2 }, { "type": "loss", "content": 0.003970776218920946, "timestamp": "2025-09-10 02:29:20.311023", "step": 3284, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:20.339930", "step": 3284, "epoch": 2 }, { "type": "loss", "content": 0.0027751787565648556, "timestamp": "2025-09-10 02:29:20.341953", "step": 3285, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:20.370591", "step": 3285, "epoch": 2 }, { "type": "loss", "content": 0.005944137927144766, "timestamp": "2025-09-10 02:29:20.372485", "step": 3286, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:20.401444", "step": 3286, "epoch": 2 }, { "type": "loss", "content": 0.0031202025711536407, "timestamp": "2025-09-10 02:29:20.403237", "step": 3287, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:20.431908", "step": 3287, "epoch": 2 }, { "type": "loss", "content": 0.002764130476862192, "timestamp": "2025-09-10 02:29:20.455458", "step": 3288, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:20.484395", "step": 3288, "epoch": 2 }, { "type": "loss", "content": 0.001558019663207233, "timestamp": "2025-09-10 02:29:20.486441", "step": 3289, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:20.515587", "step": 3289, "epoch": 2 }, { "type": "loss", "content": 0.0013384289341047406, "timestamp": "2025-09-10 02:29:20.517572", "step": 3290, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:20.546405", "step": 3290, "epoch": 2 }, { "type": "loss", "content": 0.017970601096749306, "timestamp": "2025-09-10 02:29:20.548489", "step": 3291, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:20.577071", "step": 3291, "epoch": 2 }, { "type": "loss", "content": 0.0016688795294612646, "timestamp": "2025-09-10 02:29:20.600611", "step": 3292, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:20.629800", "step": 3292, "epoch": 2 }, { "type": "loss", "content": 0.004329177085310221, "timestamp": "2025-09-10 02:29:20.631806", "step": 3293, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:20.661082", "step": 3293, "epoch": 2 }, { "type": "loss", "content": 0.04710295423865318, "timestamp": "2025-09-10 02:29:20.662935", "step": 3294, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:20.691674", "step": 3294, "epoch": 2 }, { "type": "loss", "content": 0.01943361759185791, "timestamp": "2025-09-10 02:29:20.693671", "step": 3295, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:20.722401", "step": 3295, "epoch": 2 }, { "type": "loss", "content": 0.030896736308932304, "timestamp": "2025-09-10 02:29:20.745977", "step": 3296, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:20.774635", "step": 3296, "epoch": 2 }, { "type": "loss", "content": 0.009174938313663006, "timestamp": "2025-09-10 02:29:20.776608", "step": 3297, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:20.805590", "step": 3297, "epoch": 2 }, { "type": "loss", "content": 0.0020134812220931053, "timestamp": "2025-09-10 02:29:20.807513", "step": 3298, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:20.836466", "step": 3298, "epoch": 2 }, { "type": "loss", "content": 0.007612983230501413, "timestamp": "2025-09-10 02:29:20.838503", "step": 3299, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:20.876634", "step": 3299, "epoch": 2 }, { "type": "loss", "content": 0.004460458178073168, "timestamp": "2025-09-10 02:29:20.899956", "step": 3300, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:20.930738", "step": 3300, "epoch": 2 }, { "type": "loss", "content": 0.011855359189212322, "timestamp": "2025-09-10 02:29:20.932666", "step": 3301, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:20.961791", "step": 3301, "epoch": 2 }, { "type": "loss", "content": 0.0030626137740910053, "timestamp": "2025-09-10 02:29:20.963783", "step": 3302, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:20.993081", "step": 3302, "epoch": 2 }, { "type": "loss", "content": 0.0014373933663591743, "timestamp": "2025-09-10 02:29:20.994947", "step": 3303, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:21.028196", "step": 3303, "epoch": 2 }, { "type": "loss", "content": 0.0031409133225679398, "timestamp": "2025-09-10 02:29:21.051828", "step": 3304, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:21.081688", "step": 3304, "epoch": 2 }, { "type": "loss", "content": 0.025139452889561653, "timestamp": "2025-09-10 02:29:21.083765", "step": 3305, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:21.113113", "step": 3305, "epoch": 2 }, { "type": "loss", "content": 0.00456382567062974, "timestamp": "2025-09-10 02:29:21.115199", "step": 3306, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:21.145594", "step": 3306, "epoch": 2 }, { "type": "loss", "content": 0.0022305515594780445, "timestamp": "2025-09-10 02:29:21.147349", "step": 3307, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:21.175544", "step": 3307, "epoch": 2 }, { "type": "loss", "content": 0.012439766898751259, "timestamp": "2025-09-10 02:29:21.199613", "step": 3308, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:21.229185", "step": 3308, "epoch": 2 }, { "type": "loss", "content": 0.05707881227135658, "timestamp": "2025-09-10 02:29:21.230964", "step": 3309, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:21.259353", "step": 3309, "epoch": 2 }, { "type": "loss", "content": 0.0033739774953573942, "timestamp": "2025-09-10 02:29:21.261142", "step": 3310, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:21.289987", "step": 3310, "epoch": 2 }, { "type": "loss", "content": 0.00657115550711751, "timestamp": "2025-09-10 02:29:21.291900", "step": 3311, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:21.319950", "step": 3311, "epoch": 2 }, { "type": "loss", "content": 0.0016374706756323576, "timestamp": "2025-09-10 02:29:21.343304", "step": 3312, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:21.373855", "step": 3312, "epoch": 2 }, { "type": "loss", "content": 0.05689441040158272, "timestamp": "2025-09-10 02:29:21.375788", "step": 3313, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:21.404298", "step": 3313, "epoch": 2 }, { "type": "loss", "content": 0.00048739396152086556, "timestamp": "2025-09-10 02:29:21.408079", "step": 3314, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:21.440032", "step": 3314, "epoch": 2 }, { "type": "loss", "content": 0.0032219900749623775, "timestamp": "2025-09-10 02:29:21.441891", "step": 3315, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:21.470535", "step": 3315, "epoch": 2 }, { "type": "loss", "content": 0.018400685861706734, "timestamp": "2025-09-10 02:29:21.494186", "step": 3316, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:21.522898", "step": 3316, "epoch": 2 }, { "type": "loss", "content": 0.008098559454083443, "timestamp": "2025-09-10 02:29:21.524809", "step": 3317, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:21.553205", "step": 3317, "epoch": 2 }, { "type": "loss", "content": 0.00045300991041585803, "timestamp": "2025-09-10 02:29:21.554975", "step": 3318, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:21.583810", "step": 3318, "epoch": 2 }, { "type": "loss", "content": 0.0014719769824296236, "timestamp": "2025-09-10 02:29:21.586246", "step": 3319, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:21.615211", "step": 3319, "epoch": 2 }, { "type": "loss", "content": 0.0005743346991948783, "timestamp": "2025-09-10 02:29:21.638931", "step": 3320, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:21.668418", "step": 3320, "epoch": 2 }, { "type": "loss", "content": 0.001873001572676003, "timestamp": "2025-09-10 02:29:21.670413", "step": 3321, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:21.701958", "step": 3321, "epoch": 2 }, { "type": "loss", "content": 0.004645978100597858, "timestamp": "2025-09-10 02:29:21.707690", "step": 3322, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:21.736701", "step": 3322, "epoch": 2 }, { "type": "loss", "content": 0.006194022949784994, "timestamp": "2025-09-10 02:29:21.738440", "step": 3323, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:21.767046", "step": 3323, "epoch": 2 }, { "type": "loss", "content": 0.06860353797674179, "timestamp": "2025-09-10 02:29:21.793182", "step": 3324, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:21.822251", "step": 3324, "epoch": 2 }, { "type": "loss", "content": 0.05633767321705818, "timestamp": "2025-09-10 02:29:21.824103", "step": 3325, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:21.852370", "step": 3325, "epoch": 2 }, { "type": "loss", "content": 0.0016861387994140387, "timestamp": "2025-09-10 02:29:21.854482", "step": 3326, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:29:21.883306", "step": 3326, "epoch": 2 }, { "type": "loss", "content": 0.023309985175728798, "timestamp": "2025-09-10 02:29:21.885907", "step": 3327, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:21.916141", "step": 3327, "epoch": 2 }, { "type": "loss", "content": 0.001437532133422792, "timestamp": "2025-09-10 02:29:21.939522", "step": 3328, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:21.968639", "step": 3328, "epoch": 2 }, { "type": "loss", "content": 0.004248501267284155, "timestamp": "2025-09-10 02:29:21.970328", "step": 3329, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:21.998691", "step": 3329, "epoch": 2 }, { "type": "loss", "content": 0.059723686426877975, "timestamp": "2025-09-10 02:29:22.000595", "step": 3330, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:22.029082", "step": 3330, "epoch": 2 }, { "type": "loss", "content": 0.08785872161388397, "timestamp": "2025-09-10 02:29:22.030873", "step": 3331, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:22.061330", "step": 3331, "epoch": 2 }, { "type": "loss", "content": 0.011735972948372364, "timestamp": "2025-09-10 02:29:22.093551", "step": 3332, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:22.145068", "step": 3332, "epoch": 2 }, { "type": "loss", "content": 0.001344985910691321, "timestamp": "2025-09-10 02:29:22.151334", "step": 3333, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:22.185770", "step": 3333, "epoch": 2 }, { "type": "loss", "content": 0.042980439960956573, "timestamp": "2025-09-10 02:29:22.187798", "step": 3334, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:22.216526", "step": 3334, "epoch": 2 }, { "type": "loss", "content": 0.06018718704581261, "timestamp": "2025-09-10 02:29:22.218495", "step": 3335, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:22.249027", "step": 3335, "epoch": 2 }, { "type": "loss", "content": 0.016707923263311386, "timestamp": "2025-09-10 02:29:22.272396", "step": 3336, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:22.301550", "step": 3336, "epoch": 2 }, { "type": "loss", "content": 0.0011664006160572171, "timestamp": "2025-09-10 02:29:22.303260", "step": 3337, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:22.332099", "step": 3337, "epoch": 2 }, { "type": "loss", "content": 0.000589015893638134, "timestamp": "2025-09-10 02:29:22.336015", "step": 3338, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:22.364893", "step": 3338, "epoch": 2 }, { "type": "loss", "content": 0.0005039245006628335, "timestamp": "2025-09-10 02:29:22.366881", "step": 3339, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:22.395372", "step": 3339, "epoch": 2 }, { "type": "loss", "content": 0.0020321940537542105, "timestamp": "2025-09-10 02:29:22.418714", "step": 3340, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:22.446866", "step": 3340, "epoch": 2 }, { "type": "loss", "content": 0.0007291255169548094, "timestamp": "2025-09-10 02:29:22.450296", "step": 3341, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:22.479103", "step": 3341, "epoch": 2 }, { "type": "loss", "content": 0.02235017530620098, "timestamp": "2025-09-10 02:29:22.481093", "step": 3342, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:22.509191", "step": 3342, "epoch": 2 }, { "type": "loss", "content": 0.003876918461173773, "timestamp": "2025-09-10 02:29:22.510993", "step": 3343, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:22.539293", "step": 3343, "epoch": 2 }, { "type": "loss", "content": 0.001926369033753872, "timestamp": "2025-09-10 02:29:22.562946", "step": 3344, "epoch": 2 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 96 ], "batch_size": 8, "flops": 1898404492032 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 5, 80 ], "batch_size": 8, "flops": 1582003754624 } ], "timestamp": "2025-09-10 02:29:24.434568", "step": 3344, "epoch": 2 }, { "type": "pplx", "content": 3018983.5664545535, "timestamp": "2025-09-10 02:29:24.436472", "step": 3344, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:24.464549", "step": 3344, "epoch": 2 }, { "type": "loss", "content": 0.028784185647964478, "timestamp": "2025-09-10 02:29:24.466555", "step": 3345, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:24.495671", "step": 3345, "epoch": 2 }, { "type": "loss", "content": 0.0049285888671875, "timestamp": "2025-09-10 02:29:24.497669", "step": 3346, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:24.527189", "step": 3346, "epoch": 2 }, { "type": "loss", "content": 0.004780747927725315, "timestamp": "2025-09-10 02:29:24.528928", "step": 3347, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:24.557732", "step": 3347, "epoch": 2 }, { "type": "loss", "content": 0.006687031593173742, "timestamp": "2025-09-10 02:29:24.581511", "step": 3348, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:24.611395", "step": 3348, "epoch": 2 }, { "type": "loss", "content": 0.008859639056026936, "timestamp": "2025-09-10 02:29:24.613445", "step": 3349, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:24.643085", "step": 3349, "epoch": 2 }, { "type": "loss", "content": 0.002748832106590271, "timestamp": "2025-09-10 02:29:24.644900", "step": 3350, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:24.673708", "step": 3350, "epoch": 2 }, { "type": "loss", "content": 0.0011055845534428954, "timestamp": "2025-09-10 02:29:24.675564", "step": 3351, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:24.704312", "step": 3351, "epoch": 2 }, { "type": "loss", "content": 0.022624919191002846, "timestamp": "2025-09-10 02:29:24.727930", "step": 3352, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:24.757199", "step": 3352, "epoch": 2 }, { "type": "loss", "content": 0.00422668969258666, "timestamp": "2025-09-10 02:29:24.758983", "step": 3353, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:24.787702", "step": 3353, "epoch": 2 }, { "type": "loss", "content": 0.03384550288319588, "timestamp": "2025-09-10 02:29:24.789562", "step": 3354, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:24.818420", "step": 3354, "epoch": 2 }, { "type": "loss", "content": 0.00036209248355589807, "timestamp": "2025-09-10 02:29:24.820458", "step": 3355, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:24.849795", "step": 3355, "epoch": 2 }, { "type": "loss", "content": 0.004498614929616451, "timestamp": "2025-09-10 02:29:24.873476", "step": 3356, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:24.903182", "step": 3356, "epoch": 2 }, { "type": "loss", "content": 0.05434301495552063, "timestamp": "2025-09-10 02:29:24.905219", "step": 3357, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:24.934709", "step": 3357, "epoch": 2 }, { "type": "loss", "content": 0.0021592320408672094, "timestamp": "2025-09-10 02:29:24.936839", "step": 3358, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:24.966005", "step": 3358, "epoch": 2 }, { "type": "loss", "content": 0.025666924193501472, "timestamp": "2025-09-10 02:29:24.967846", "step": 3359, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:24.996816", "step": 3359, "epoch": 2 }, { "type": "loss", "content": 0.004111967049539089, "timestamp": "2025-09-10 02:29:25.020272", "step": 3360, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:25.049379", "step": 3360, "epoch": 2 }, { "type": "loss", "content": 0.016234159469604492, "timestamp": "2025-09-10 02:29:25.051309", "step": 3361, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:25.079988", "step": 3361, "epoch": 2 }, { "type": "loss", "content": 0.00322941062040627, "timestamp": "2025-09-10 02:29:25.081983", "step": 3362, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:25.110693", "step": 3362, "epoch": 2 }, { "type": "loss", "content": 0.009945042431354523, "timestamp": "2025-09-10 02:29:25.112523", "step": 3363, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:25.141320", "step": 3363, "epoch": 2 }, { "type": "loss", "content": 0.01008332334458828, "timestamp": "2025-09-10 02:29:25.164925", "step": 3364, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:25.194028", "step": 3364, "epoch": 2 }, { "type": "loss", "content": 0.03316834196448326, "timestamp": "2025-09-10 02:29:25.195770", "step": 3365, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:25.224273", "step": 3365, "epoch": 2 }, { "type": "loss", "content": 0.031795572489500046, "timestamp": "2025-09-10 02:29:25.226013", "step": 3366, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:25.254808", "step": 3366, "epoch": 2 }, { "type": "loss", "content": 0.001609918661415577, "timestamp": "2025-09-10 02:29:25.256805", "step": 3367, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:25.285413", "step": 3367, "epoch": 2 }, { "type": "loss", "content": 0.04161988943815231, "timestamp": "2025-09-10 02:29:25.308826", "step": 3368, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:25.337829", "step": 3368, "epoch": 2 }, { "type": "loss", "content": 0.006874702405184507, "timestamp": "2025-09-10 02:29:25.340013", "step": 3369, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:25.368830", "step": 3369, "epoch": 2 }, { "type": "loss", "content": 0.004394261632114649, "timestamp": "2025-09-10 02:29:25.370791", "step": 3370, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:25.399231", "step": 3370, "epoch": 2 }, { "type": "loss", "content": 0.0316348522901535, "timestamp": "2025-09-10 02:29:25.401226", "step": 3371, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:25.430278", "step": 3371, "epoch": 2 }, { "type": "loss", "content": 0.017396869137883186, "timestamp": "2025-09-10 02:29:25.453715", "step": 3372, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:25.483241", "step": 3372, "epoch": 2 }, { "type": "loss", "content": 0.03450683131814003, "timestamp": "2025-09-10 02:29:25.485245", "step": 3373, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:25.514628", "step": 3373, "epoch": 2 }, { "type": "loss", "content": 0.03623649477958679, "timestamp": "2025-09-10 02:29:25.517827", "step": 3374, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:25.549368", "step": 3374, "epoch": 2 }, { "type": "loss", "content": 0.010904884897172451, "timestamp": "2025-09-10 02:29:25.551104", "step": 3375, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:25.579987", "step": 3375, "epoch": 2 }, { "type": "loss", "content": 0.02146339975297451, "timestamp": "2025-09-10 02:29:25.603634", "step": 3376, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:25.632537", "step": 3376, "epoch": 2 }, { "type": "loss", "content": 0.005433398764580488, "timestamp": "2025-09-10 02:29:25.634841", "step": 3377, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:25.663655", "step": 3377, "epoch": 2 }, { "type": "loss", "content": 0.0025793889071792364, "timestamp": "2025-09-10 02:29:25.665886", "step": 3378, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:25.694966", "step": 3378, "epoch": 2 }, { "type": "loss", "content": 0.009922388009727001, "timestamp": "2025-09-10 02:29:25.697144", "step": 3379, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:25.726561", "step": 3379, "epoch": 2 }, { "type": "loss", "content": 0.007967361249029636, "timestamp": "2025-09-10 02:29:25.749978", "step": 3380, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:25.778805", "step": 3380, "epoch": 2 }, { "type": "loss", "content": 0.0033568316139280796, "timestamp": "2025-09-10 02:29:25.780780", "step": 3381, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:25.809391", "step": 3381, "epoch": 2 }, { "type": "loss", "content": 0.001999011030420661, "timestamp": "2025-09-10 02:29:25.811136", "step": 3382, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:25.839904", "step": 3382, "epoch": 2 }, { "type": "loss", "content": 0.013739910908043385, "timestamp": "2025-09-10 02:29:25.841594", "step": 3383, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:25.870836", "step": 3383, "epoch": 2 }, { "type": "loss", "content": 0.011606521904468536, "timestamp": "2025-09-10 02:29:25.894285", "step": 3384, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:25.923217", "step": 3384, "epoch": 2 }, { "type": "loss", "content": 0.00559838255867362, "timestamp": "2025-09-10 02:29:25.925016", "step": 3385, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:25.953905", "step": 3385, "epoch": 2 }, { "type": "loss", "content": 0.0021412342321127653, "timestamp": "2025-09-10 02:29:25.955850", "step": 3386, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:25.984274", "step": 3386, "epoch": 2 }, { "type": "loss", "content": 0.052755001932382584, "timestamp": "2025-09-10 02:29:25.986171", "step": 3387, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:26.014878", "step": 3387, "epoch": 2 }, { "type": "loss", "content": 0.009612246416509151, "timestamp": "2025-09-10 02:29:26.038377", "step": 3388, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:26.067409", "step": 3388, "epoch": 2 }, { "type": "loss", "content": 0.0032427285332232714, "timestamp": "2025-09-10 02:29:26.069349", "step": 3389, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:26.098329", "step": 3389, "epoch": 2 }, { "type": "loss", "content": 0.0020761454943567514, "timestamp": "2025-09-10 02:29:26.100297", "step": 3390, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:26.129374", "step": 3390, "epoch": 2 }, { "type": "loss", "content": 0.018983175978064537, "timestamp": "2025-09-10 02:29:26.131126", "step": 3391, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:26.159621", "step": 3391, "epoch": 2 }, { "type": "loss", "content": 0.007009588647633791, "timestamp": "2025-09-10 02:29:26.183076", "step": 3392, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:26.211698", "step": 3392, "epoch": 2 }, { "type": "loss", "content": 0.021478259935975075, "timestamp": "2025-09-10 02:29:26.213656", "step": 3393, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:26.242584", "step": 3393, "epoch": 2 }, { "type": "loss", "content": 0.001726645277813077, "timestamp": "2025-09-10 02:29:26.244408", "step": 3394, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:26.272996", "step": 3394, "epoch": 2 }, { "type": "loss", "content": 0.003632990876212716, "timestamp": "2025-09-10 02:29:26.274833", "step": 3395, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:26.303325", "step": 3395, "epoch": 2 }, { "type": "loss", "content": 0.011844513937830925, "timestamp": "2025-09-10 02:29:26.326613", "step": 3396, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:29:26.355623", "step": 3396, "epoch": 2 }, { "type": "loss", "content": 0.004450249020010233, "timestamp": "2025-09-10 02:29:26.357462", "step": 3397, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:26.385908", "step": 3397, "epoch": 2 }, { "type": "loss", "content": 0.003970394842326641, "timestamp": "2025-09-10 02:29:26.387664", "step": 3398, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:26.416386", "step": 3398, "epoch": 2 }, { "type": "loss", "content": 0.0025803286116570234, "timestamp": "2025-09-10 02:29:26.418209", "step": 3399, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:26.446717", "step": 3399, "epoch": 2 }, { "type": "loss", "content": 0.014798497781157494, "timestamp": "2025-09-10 02:29:26.470274", "step": 3400, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:26.499963", "step": 3400, "epoch": 2 }, { "type": "loss", "content": 0.03386901691555977, "timestamp": "2025-09-10 02:29:26.502185", "step": 3401, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:26.530730", "step": 3401, "epoch": 2 }, { "type": "loss", "content": 0.0016055998858064413, "timestamp": "2025-09-10 02:29:26.532549", "step": 3402, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:26.561165", "step": 3402, "epoch": 2 }, { "type": "loss", "content": 0.016830632463097572, "timestamp": "2025-09-10 02:29:26.563043", "step": 3403, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:26.591918", "step": 3403, "epoch": 2 }, { "type": "loss", "content": 0.003983858972787857, "timestamp": "2025-09-10 02:29:26.615365", "step": 3404, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:26.644936", "step": 3404, "epoch": 2 }, { "type": "loss", "content": 0.0032172624487429857, "timestamp": "2025-09-10 02:29:26.646793", "step": 3405, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:26.675413", "step": 3405, "epoch": 2 }, { "type": "loss", "content": 0.003136297222226858, "timestamp": "2025-09-10 02:29:26.677273", "step": 3406, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:26.706878", "step": 3406, "epoch": 2 }, { "type": "loss", "content": 0.002121201017871499, "timestamp": "2025-09-10 02:29:26.708762", "step": 3407, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:26.737565", "step": 3407, "epoch": 2 }, { "type": "loss", "content": 0.001867905491963029, "timestamp": "2025-09-10 02:29:26.761068", "step": 3408, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:26.790238", "step": 3408, "epoch": 2 }, { "type": "loss", "content": 0.03702418878674507, "timestamp": "2025-09-10 02:29:26.792230", "step": 3409, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:26.821867", "step": 3409, "epoch": 2 }, { "type": "loss", "content": 0.03472406044602394, "timestamp": "2025-09-10 02:29:26.823951", "step": 3410, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:26.852952", "step": 3410, "epoch": 2 }, { "type": "loss", "content": 0.002561768749728799, "timestamp": "2025-09-10 02:29:26.855021", "step": 3411, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:26.883989", "step": 3411, "epoch": 2 }, { "type": "loss", "content": 0.06792227923870087, "timestamp": "2025-09-10 02:29:26.907367", "step": 3412, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:26.936732", "step": 3412, "epoch": 2 }, { "type": "loss", "content": 0.014536969363689423, "timestamp": "2025-09-10 02:29:26.938496", "step": 3413, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:26.967631", "step": 3413, "epoch": 2 }, { "type": "loss", "content": 0.016044307500123978, "timestamp": "2025-09-10 02:29:26.969439", "step": 3414, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:26.998421", "step": 3414, "epoch": 2 }, { "type": "loss", "content": 0.011875098571181297, "timestamp": "2025-09-10 02:29:27.000541", "step": 3415, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:27.029503", "step": 3415, "epoch": 2 }, { "type": "loss", "content": 0.019096845760941505, "timestamp": "2025-09-10 02:29:27.053076", "step": 3416, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:27.081695", "step": 3416, "epoch": 2 }, { "type": "loss", "content": 0.0050103445537388325, "timestamp": "2025-09-10 02:29:27.083502", "step": 3417, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:27.112350", "step": 3417, "epoch": 2 }, { "type": "loss", "content": 0.008464130572974682, "timestamp": "2025-09-10 02:29:27.114344", "step": 3418, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:27.143577", "step": 3418, "epoch": 2 }, { "type": "loss", "content": 0.010651574470102787, "timestamp": "2025-09-10 02:29:27.145648", "step": 3419, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:27.174391", "step": 3419, "epoch": 2 }, { "type": "loss", "content": 0.003018937772139907, "timestamp": "2025-09-10 02:29:27.197966", "step": 3420, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:27.227185", "step": 3420, "epoch": 2 }, { "type": "loss", "content": 0.01083854865282774, "timestamp": "2025-09-10 02:29:27.229090", "step": 3421, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:27.257941", "step": 3421, "epoch": 2 }, { "type": "loss", "content": 0.003781322157010436, "timestamp": "2025-09-10 02:29:27.259789", "step": 3422, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:27.289412", "step": 3422, "epoch": 2 }, { "type": "loss", "content": 0.002141769276931882, "timestamp": "2025-09-10 02:29:27.291203", "step": 3423, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:27.320929", "step": 3423, "epoch": 2 }, { "type": "loss", "content": 0.0017488569719716907, "timestamp": "2025-09-10 02:29:27.344572", "step": 3424, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:27.374392", "step": 3424, "epoch": 2 }, { "type": "loss", "content": 0.003756160382181406, "timestamp": "2025-09-10 02:29:27.376298", "step": 3425, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:27.405695", "step": 3425, "epoch": 2 }, { "type": "loss", "content": 0.0471893735229969, "timestamp": "2025-09-10 02:29:27.407774", "step": 3426, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:27.436568", "step": 3426, "epoch": 2 }, { "type": "loss", "content": 0.001391625963151455, "timestamp": "2025-09-10 02:29:27.438477", "step": 3427, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:27.467392", "step": 3427, "epoch": 2 }, { "type": "loss", "content": 0.0011366669787093997, "timestamp": "2025-09-10 02:29:27.490924", "step": 3428, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:27.519508", "step": 3428, "epoch": 2 }, { "type": "loss", "content": 0.029718205332756042, "timestamp": "2025-09-10 02:29:27.521624", "step": 3429, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:27.550042", "step": 3429, "epoch": 2 }, { "type": "loss", "content": 0.0040368977934122086, "timestamp": "2025-09-10 02:29:27.552078", "step": 3430, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:27.580801", "step": 3430, "epoch": 2 }, { "type": "loss", "content": 0.029250582680106163, "timestamp": "2025-09-10 02:29:27.582681", "step": 3431, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:27.611238", "step": 3431, "epoch": 2 }, { "type": "loss", "content": 0.001969092758372426, "timestamp": "2025-09-10 02:29:27.636732", "step": 3432, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:27.665333", "step": 3432, "epoch": 2 }, { "type": "loss", "content": 0.019174734130501747, "timestamp": "2025-09-10 02:29:27.671073", "step": 3433, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:27.700002", "step": 3433, "epoch": 2 }, { "type": "loss", "content": 0.002094629453495145, "timestamp": "2025-09-10 02:29:27.701941", "step": 3434, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:27.744666", "step": 3434, "epoch": 2 }, { "type": "loss", "content": 0.010229520499706268, "timestamp": "2025-09-10 02:29:27.746496", "step": 3435, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:27.775354", "step": 3435, "epoch": 2 }, { "type": "loss", "content": 0.00620575575158, "timestamp": "2025-09-10 02:29:27.799553", "step": 3436, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:27.828802", "step": 3436, "epoch": 2 }, { "type": "loss", "content": 0.003741190303117037, "timestamp": "2025-09-10 02:29:27.830670", "step": 3437, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:27.859548", "step": 3437, "epoch": 2 }, { "type": "loss", "content": 0.0019687057938426733, "timestamp": "2025-09-10 02:29:27.861390", "step": 3438, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:27.890290", "step": 3438, "epoch": 2 }, { "type": "loss", "content": 0.002562493784353137, "timestamp": "2025-09-10 02:29:27.893947", "step": 3439, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:27.922961", "step": 3439, "epoch": 2 }, { "type": "loss", "content": 0.002744677709415555, "timestamp": "2025-09-10 02:29:27.946368", "step": 3440, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:27.975027", "step": 3440, "epoch": 2 }, { "type": "loss", "content": 0.029536891728639603, "timestamp": "2025-09-10 02:29:27.976911", "step": 3441, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:28.005981", "step": 3441, "epoch": 2 }, { "type": "loss", "content": 0.0070204222574830055, "timestamp": "2025-09-10 02:29:28.007875", "step": 3442, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:28.037034", "step": 3442, "epoch": 2 }, { "type": "loss", "content": 0.0031619048677384853, "timestamp": "2025-09-10 02:29:28.038971", "step": 3443, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:28.067874", "step": 3443, "epoch": 2 }, { "type": "loss", "content": 0.012876972556114197, "timestamp": "2025-09-10 02:29:28.094598", "step": 3444, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:28.123625", "step": 3444, "epoch": 2 }, { "type": "loss", "content": 0.03683038428425789, "timestamp": "2025-09-10 02:29:28.126352", "step": 3445, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:28.161716", "step": 3445, "epoch": 2 }, { "type": "loss", "content": 0.009324649348855019, "timestamp": "2025-09-10 02:29:28.163583", "step": 3446, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:28.192594", "step": 3446, "epoch": 2 }, { "type": "loss", "content": 0.023574920371174812, "timestamp": "2025-09-10 02:29:28.194532", "step": 3447, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:28.223050", "step": 3447, "epoch": 2 }, { "type": "loss", "content": 0.0015393022913485765, "timestamp": "2025-09-10 02:29:28.246362", "step": 3448, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:28.275267", "step": 3448, "epoch": 2 }, { "type": "loss", "content": 0.006074630655348301, "timestamp": "2025-09-10 02:29:28.277214", "step": 3449, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:28.306017", "step": 3449, "epoch": 2 }, { "type": "loss", "content": 0.0035448498092591763, "timestamp": "2025-09-10 02:29:28.308477", "step": 3450, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:28.338933", "step": 3450, "epoch": 2 }, { "type": "loss", "content": 0.009274295531213284, "timestamp": "2025-09-10 02:29:28.340891", "step": 3451, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:28.369529", "step": 3451, "epoch": 2 }, { "type": "loss", "content": 0.0013662822311744094, "timestamp": "2025-09-10 02:29:28.393039", "step": 3452, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:28.422352", "step": 3452, "epoch": 2 }, { "type": "loss", "content": 0.049186598509550095, "timestamp": "2025-09-10 02:29:28.426418", "step": 3453, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:28.458183", "step": 3453, "epoch": 2 }, { "type": "loss", "content": 0.0017654149560257792, "timestamp": "2025-09-10 02:29:28.460078", "step": 3454, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:28.489029", "step": 3454, "epoch": 2 }, { "type": "loss", "content": 0.0012628829572349787, "timestamp": "2025-09-10 02:29:28.490998", "step": 3455, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:28.520455", "step": 3455, "epoch": 2 }, { "type": "loss", "content": 0.0014435934135690331, "timestamp": "2025-09-10 02:29:28.545666", "step": 3456, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:28.574820", "step": 3456, "epoch": 2 }, { "type": "loss", "content": 0.009639963507652283, "timestamp": "2025-09-10 02:29:28.576435", "step": 3457, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:28.605398", "step": 3457, "epoch": 2 }, { "type": "loss", "content": 0.013706199824810028, "timestamp": "2025-09-10 02:29:28.607646", "step": 3458, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:28.636564", "step": 3458, "epoch": 2 }, { "type": "loss", "content": 0.02213672362267971, "timestamp": "2025-09-10 02:29:28.638596", "step": 3459, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:28.667385", "step": 3459, "epoch": 2 }, { "type": "loss", "content": 0.004982142709195614, "timestamp": "2025-09-10 02:29:28.690824", "step": 3460, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:28.719813", "step": 3460, "epoch": 2 }, { "type": "loss", "content": 0.022070636972784996, "timestamp": "2025-09-10 02:29:28.721646", "step": 3461, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:28.750575", "step": 3461, "epoch": 2 }, { "type": "loss", "content": 0.004302029497921467, "timestamp": "2025-09-10 02:29:28.752666", "step": 3462, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:28.781357", "step": 3462, "epoch": 2 }, { "type": "loss", "content": 0.006409894675016403, "timestamp": "2025-09-10 02:29:28.783335", "step": 3463, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:28.812156", "step": 3463, "epoch": 2 }, { "type": "loss", "content": 0.01828247867524624, "timestamp": "2025-09-10 02:29:28.835624", "step": 3464, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:28.864026", "step": 3464, "epoch": 2 }, { "type": "loss", "content": 0.0010449145920574665, "timestamp": "2025-09-10 02:29:28.865961", "step": 3465, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:28.894868", "step": 3465, "epoch": 2 }, { "type": "loss", "content": 0.0005158040439710021, "timestamp": "2025-09-10 02:29:28.896894", "step": 3466, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:28.925581", "step": 3466, "epoch": 2 }, { "type": "loss", "content": 0.0013656012015417218, "timestamp": "2025-09-10 02:29:28.927576", "step": 3467, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:28.955832", "step": 3467, "epoch": 2 }, { "type": "loss", "content": 0.008575326763093472, "timestamp": "2025-09-10 02:29:28.979407", "step": 3468, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:29.008062", "step": 3468, "epoch": 2 }, { "type": "loss", "content": 0.0006952978437766433, "timestamp": "2025-09-10 02:29:29.010079", "step": 3469, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:29.038443", "step": 3469, "epoch": 2 }, { "type": "loss", "content": 0.0007703721639700234, "timestamp": "2025-09-10 02:29:29.040388", "step": 3470, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:29.069135", "step": 3470, "epoch": 2 }, { "type": "loss", "content": 0.02380296401679516, "timestamp": "2025-09-10 02:29:29.071078", "step": 3471, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:29.099795", "step": 3471, "epoch": 2 }, { "type": "loss", "content": 0.0008592799422331154, "timestamp": "2025-09-10 02:29:29.123363", "step": 3472, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:29.152575", "step": 3472, "epoch": 2 }, { "type": "loss", "content": 0.0031577313784509897, "timestamp": "2025-09-10 02:29:29.154603", "step": 3473, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:29.184758", "step": 3473, "epoch": 2 }, { "type": "loss", "content": 0.0005181062733754516, "timestamp": "2025-09-10 02:29:29.186931", "step": 3474, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:29.215732", "step": 3474, "epoch": 2 }, { "type": "loss", "content": 0.003346957266330719, "timestamp": "2025-09-10 02:29:29.217573", "step": 3475, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:29.246181", "step": 3475, "epoch": 2 }, { "type": "loss", "content": 0.045351382344961166, "timestamp": "2025-09-10 02:29:29.269556", "step": 3476, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:29.298574", "step": 3476, "epoch": 2 }, { "type": "loss", "content": 0.008699034340679646, "timestamp": "2025-09-10 02:29:29.300537", "step": 3477, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:29.329406", "step": 3477, "epoch": 2 }, { "type": "loss", "content": 0.021816948428750038, "timestamp": "2025-09-10 02:29:29.331524", "step": 3478, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:29.360328", "step": 3478, "epoch": 2 }, { "type": "loss", "content": 0.005231685936450958, "timestamp": "2025-09-10 02:29:29.362178", "step": 3479, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:29.391352", "step": 3479, "epoch": 2 }, { "type": "loss", "content": 0.0012015464017167687, "timestamp": "2025-09-10 02:29:29.414803", "step": 3480, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:29.443695", "step": 3480, "epoch": 2 }, { "type": "loss", "content": 0.00045789359137415886, "timestamp": "2025-09-10 02:29:29.445627", "step": 3481, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:29.474691", "step": 3481, "epoch": 2 }, { "type": "loss", "content": 0.0006896085687913001, "timestamp": "2025-09-10 02:29:29.476601", "step": 3482, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:29.505179", "step": 3482, "epoch": 2 }, { "type": "loss", "content": 0.0012523357290774584, "timestamp": "2025-09-10 02:29:29.507093", "step": 3483, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:29.535753", "step": 3483, "epoch": 2 }, { "type": "loss", "content": 0.0377359464764595, "timestamp": "2025-09-10 02:29:29.559124", "step": 3484, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:29.588202", "step": 3484, "epoch": 2 }, { "type": "loss", "content": 0.006449830252677202, "timestamp": "2025-09-10 02:29:29.590313", "step": 3485, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:29.619051", "step": 3485, "epoch": 2 }, { "type": "loss", "content": 0.001721881446428597, "timestamp": "2025-09-10 02:29:29.621109", "step": 3486, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:29.649769", "step": 3486, "epoch": 2 }, { "type": "loss", "content": 0.043385621160268784, "timestamp": "2025-09-10 02:29:29.651931", "step": 3487, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:29.680690", "step": 3487, "epoch": 2 }, { "type": "loss", "content": 0.00024432101054117084, "timestamp": "2025-09-10 02:29:29.704387", "step": 3488, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:29.733473", "step": 3488, "epoch": 2 }, { "type": "loss", "content": 0.032618287950754166, "timestamp": "2025-09-10 02:29:29.735503", "step": 3489, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:29.764177", "step": 3489, "epoch": 2 }, { "type": "loss", "content": 0.025582995265722275, "timestamp": "2025-09-10 02:29:29.766019", "step": 3490, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:29.794705", "step": 3490, "epoch": 2 }, { "type": "loss", "content": 0.03573060408234596, "timestamp": "2025-09-10 02:29:29.796596", "step": 3491, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:29.825257", "step": 3491, "epoch": 2 }, { "type": "loss", "content": 0.0006821600836701691, "timestamp": "2025-09-10 02:29:29.850923", "step": 3492, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:29.883501", "step": 3492, "epoch": 2 }, { "type": "loss", "content": 0.000552928657270968, "timestamp": "2025-09-10 02:29:29.885544", "step": 3493, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:29.918156", "step": 3493, "epoch": 2 }, { "type": "loss", "content": 0.037706244736909866, "timestamp": "2025-09-10 02:29:29.926313", "step": 3494, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:29.955427", "step": 3494, "epoch": 2 }, { "type": "loss", "content": 0.0017515952931717038, "timestamp": "2025-09-10 02:29:29.957296", "step": 3495, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:29.986401", "step": 3495, "epoch": 2 }, { "type": "loss", "content": 0.0004756085982080549, "timestamp": "2025-09-10 02:29:30.012731", "step": 3496, "epoch": 2 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 96 ], "batch_size": 8, "flops": 1898404492032 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 5, 80 ], "batch_size": 8, "flops": 1582003754624 } ], "timestamp": "2025-09-10 02:29:32.030591", "step": 3496, "epoch": 2 }, { "type": "pplx", "content": 2477887.027473765, "timestamp": "2025-09-10 02:29:32.032529", "step": 3496, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:32.059958", "step": 3496, "epoch": 2 }, { "type": "loss", "content": 0.001944307005032897, "timestamp": "2025-09-10 02:29:32.062180", "step": 3497, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:32.090925", "step": 3497, "epoch": 2 }, { "type": "loss", "content": 0.0046856580302119255, "timestamp": "2025-09-10 02:29:32.092885", "step": 3498, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:32.121881", "step": 3498, "epoch": 2 }, { "type": "loss", "content": 0.04017262905836105, "timestamp": "2025-09-10 02:29:32.123950", "step": 3499, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:32.152564", "step": 3499, "epoch": 2 }, { "type": "loss", "content": 0.0051589831709861755, "timestamp": "2025-09-10 02:29:32.176088", "step": 3500, "epoch": 2 }, { "type": "info", "content": "Checkpoint saved at step 3500", "timestamp": "2025-09-10 02:29:36.516402", "step": 3500, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:36.547565", "step": 3500, "epoch": 2 }, { "type": "loss", "content": 0.0038563567213714123, "timestamp": "2025-09-10 02:29:36.549556", "step": 3501, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:36.578524", "step": 3501, "epoch": 2 }, { "type": "loss", "content": 0.004655842669308186, "timestamp": "2025-09-10 02:29:36.580537", "step": 3502, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:36.608966", "step": 3502, "epoch": 2 }, { "type": "loss", "content": 0.0008553729276172817, "timestamp": "2025-09-10 02:29:36.610908", "step": 3503, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:36.639406", "step": 3503, "epoch": 2 }, { "type": "loss", "content": 0.00034527681418694556, "timestamp": "2025-09-10 02:29:36.663059", "step": 3504, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:36.692017", "step": 3504, "epoch": 2 }, { "type": "loss", "content": 0.00033239685581065714, "timestamp": "2025-09-10 02:29:36.694041", "step": 3505, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:36.722970", "step": 3505, "epoch": 2 }, { "type": "loss", "content": 0.013092617504298687, "timestamp": "2025-09-10 02:29:36.725097", "step": 3506, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:36.753859", "step": 3506, "epoch": 2 }, { "type": "loss", "content": 0.003901219693943858, "timestamp": "2025-09-10 02:29:36.755976", "step": 3507, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:36.784651", "step": 3507, "epoch": 2 }, { "type": "loss", "content": 0.0010723823215812445, "timestamp": "2025-09-10 02:29:36.808105", "step": 3508, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:36.838189", "step": 3508, "epoch": 2 }, { "type": "loss", "content": 0.0008635511621832848, "timestamp": "2025-09-10 02:29:36.840762", "step": 3509, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:36.869860", "step": 3509, "epoch": 2 }, { "type": "loss", "content": 0.0037847573403269053, "timestamp": "2025-09-10 02:29:36.872004", "step": 3510, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:36.900657", "step": 3510, "epoch": 2 }, { "type": "loss", "content": 0.0011993770021945238, "timestamp": "2025-09-10 02:29:36.902912", "step": 3511, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:36.931867", "step": 3511, "epoch": 2 }, { "type": "loss", "content": 0.07670789211988449, "timestamp": "2025-09-10 02:29:36.955387", "step": 3512, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:36.984640", "step": 3512, "epoch": 2 }, { "type": "loss", "content": 0.0012009853962808847, "timestamp": "2025-09-10 02:29:36.986578", "step": 3513, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:37.015164", "step": 3513, "epoch": 2 }, { "type": "loss", "content": 0.004377874545753002, "timestamp": "2025-09-10 02:29:37.017174", "step": 3514, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:37.045656", "step": 3514, "epoch": 2 }, { "type": "loss", "content": 0.001754789613187313, "timestamp": "2025-09-10 02:29:37.047481", "step": 3515, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:37.076835", "step": 3515, "epoch": 2 }, { "type": "loss", "content": 0.007395769469439983, "timestamp": "2025-09-10 02:29:37.100299", "step": 3516, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:37.129094", "step": 3516, "epoch": 2 }, { "type": "loss", "content": 0.000709313084371388, "timestamp": "2025-09-10 02:29:37.131152", "step": 3517, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:37.159892", "step": 3517, "epoch": 2 }, { "type": "loss", "content": 0.0031018254812806845, "timestamp": "2025-09-10 02:29:37.161784", "step": 3518, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:37.190375", "step": 3518, "epoch": 2 }, { "type": "loss", "content": 0.000283032248262316, "timestamp": "2025-09-10 02:29:37.192222", "step": 3519, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:37.220567", "step": 3519, "epoch": 2 }, { "type": "loss", "content": 0.0018501004669815302, "timestamp": "2025-09-10 02:29:37.244068", "step": 3520, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:37.273199", "step": 3520, "epoch": 2 }, { "type": "loss", "content": 0.02471715770661831, "timestamp": "2025-09-10 02:29:37.275160", "step": 3521, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:37.303703", "step": 3521, "epoch": 2 }, { "type": "loss", "content": 0.0008763830992393196, "timestamp": "2025-09-10 02:29:37.307100", "step": 3522, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:37.335803", "step": 3522, "epoch": 2 }, { "type": "loss", "content": 0.0005971781210973859, "timestamp": "2025-09-10 02:29:37.337669", "step": 3523, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:37.366449", "step": 3523, "epoch": 2 }, { "type": "loss", "content": 0.024703845381736755, "timestamp": "2025-09-10 02:29:37.390283", "step": 3524, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:37.420164", "step": 3524, "epoch": 2 }, { "type": "loss", "content": 0.0002873912744689733, "timestamp": "2025-09-10 02:29:37.422345", "step": 3525, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:37.451208", "step": 3525, "epoch": 2 }, { "type": "loss", "content": 0.004364865832030773, "timestamp": "2025-09-10 02:29:37.456125", "step": 3526, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:37.485447", "step": 3526, "epoch": 2 }, { "type": "loss", "content": 0.0008310721605084836, "timestamp": "2025-09-10 02:29:37.487392", "step": 3527, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:37.516317", "step": 3527, "epoch": 2 }, { "type": "loss", "content": 0.0021310069132596254, "timestamp": "2025-09-10 02:29:37.539554", "step": 3528, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:37.568742", "step": 3528, "epoch": 2 }, { "type": "loss", "content": 0.014144135639071465, "timestamp": "2025-09-10 02:29:37.570781", "step": 3529, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:37.600811", "step": 3529, "epoch": 2 }, { "type": "loss", "content": 0.012850276194512844, "timestamp": "2025-09-10 02:29:37.602796", "step": 3530, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:37.631702", "step": 3530, "epoch": 2 }, { "type": "loss", "content": 0.05908694490790367, "timestamp": "2025-09-10 02:29:37.634020", "step": 3531, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:37.663433", "step": 3531, "epoch": 2 }, { "type": "loss", "content": 0.01407054252922535, "timestamp": "2025-09-10 02:29:37.686971", "step": 3532, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:37.716974", "step": 3532, "epoch": 2 }, { "type": "loss", "content": 0.0031040941830724478, "timestamp": "2025-09-10 02:29:37.718962", "step": 3533, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:37.748567", "step": 3533, "epoch": 2 }, { "type": "loss", "content": 0.003990839701145887, "timestamp": "2025-09-10 02:29:37.752014", "step": 3534, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:37.784899", "step": 3534, "epoch": 2 }, { "type": "loss", "content": 0.022437114268541336, "timestamp": "2025-09-10 02:29:37.791828", "step": 3535, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:37.822638", "step": 3535, "epoch": 2 }, { "type": "loss", "content": 0.001835890463553369, "timestamp": "2025-09-10 02:29:37.846092", "step": 3536, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:37.875932", "step": 3536, "epoch": 2 }, { "type": "loss", "content": 0.04690726473927498, "timestamp": "2025-09-10 02:29:37.882277", "step": 3537, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:37.911826", "step": 3537, "epoch": 2 }, { "type": "loss", "content": 0.0009008381748571992, "timestamp": "2025-09-10 02:29:37.913927", "step": 3538, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:37.945715", "step": 3538, "epoch": 2 }, { "type": "loss", "content": 0.003957767505198717, "timestamp": "2025-09-10 02:29:37.947690", "step": 3539, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:37.976246", "step": 3539, "epoch": 2 }, { "type": "loss", "content": 0.003778811078518629, "timestamp": "2025-09-10 02:29:38.000291", "step": 3540, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:38.029685", "step": 3540, "epoch": 2 }, { "type": "loss", "content": 0.0017288854578509927, "timestamp": "2025-09-10 02:29:38.031784", "step": 3541, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:38.063693", "step": 3541, "epoch": 2 }, { "type": "loss", "content": 0.0164285097271204, "timestamp": "2025-09-10 02:29:38.065775", "step": 3542, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:38.095185", "step": 3542, "epoch": 2 }, { "type": "loss", "content": 0.0016188893932849169, "timestamp": "2025-09-10 02:29:38.097295", "step": 3543, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:38.127092", "step": 3543, "epoch": 2 }, { "type": "loss", "content": 0.0013423208147287369, "timestamp": "2025-09-10 02:29:38.154809", "step": 3544, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:38.183616", "step": 3544, "epoch": 2 }, { "type": "loss", "content": 0.006067909765988588, "timestamp": "2025-09-10 02:29:38.185664", "step": 3545, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:38.219929", "step": 3545, "epoch": 2 }, { "type": "loss", "content": 0.040989816188812256, "timestamp": "2025-09-10 02:29:38.222035", "step": 3546, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:38.251384", "step": 3546, "epoch": 2 }, { "type": "loss", "content": 0.0001859536860138178, "timestamp": "2025-09-10 02:29:38.253232", "step": 3547, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:38.282909", "step": 3547, "epoch": 2 }, { "type": "loss", "content": 0.028182286769151688, "timestamp": "2025-09-10 02:29:38.306464", "step": 3548, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:38.335805", "step": 3548, "epoch": 2 }, { "type": "loss", "content": 0.00029088411247357726, "timestamp": "2025-09-10 02:29:38.337863", "step": 3549, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:38.372903", "step": 3549, "epoch": 2 }, { "type": "loss", "content": 0.0074108196422457695, "timestamp": "2025-09-10 02:29:38.374929", "step": 3550, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:38.405216", "step": 3550, "epoch": 2 }, { "type": "loss", "content": 0.014489994384348392, "timestamp": "2025-09-10 02:29:38.407397", "step": 3551, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:38.436446", "step": 3551, "epoch": 2 }, { "type": "loss", "content": 0.006930240895599127, "timestamp": "2025-09-10 02:29:38.460075", "step": 3552, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:38.489110", "step": 3552, "epoch": 2 }, { "type": "loss", "content": 0.004822719842195511, "timestamp": "2025-09-10 02:29:38.492904", "step": 3553, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:38.522591", "step": 3553, "epoch": 2 }, { "type": "loss", "content": 0.01741173304617405, "timestamp": "2025-09-10 02:29:38.524799", "step": 3554, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:38.554263", "step": 3554, "epoch": 2 }, { "type": "loss", "content": 0.013965368270874023, "timestamp": "2025-09-10 02:29:38.556134", "step": 3555, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:38.585763", "step": 3555, "epoch": 2 }, { "type": "loss", "content": 0.0038411770947277546, "timestamp": "2025-09-10 02:29:38.609351", "step": 3556, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:38.639562", "step": 3556, "epoch": 2 }, { "type": "loss", "content": 0.00871545635163784, "timestamp": "2025-09-10 02:29:38.641667", "step": 3557, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:38.673246", "step": 3557, "epoch": 2 }, { "type": "loss", "content": 0.0006059638108126819, "timestamp": "2025-09-10 02:29:38.675202", "step": 3558, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:38.710990", "step": 3558, "epoch": 2 }, { "type": "loss", "content": 0.0036140538286417723, "timestamp": "2025-09-10 02:29:38.712762", "step": 3559, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:38.741920", "step": 3559, "epoch": 2 }, { "type": "loss", "content": 0.006955344695597887, "timestamp": "2025-09-10 02:29:38.765435", "step": 3560, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:38.795028", "step": 3560, "epoch": 2 }, { "type": "loss", "content": 0.007602483965456486, "timestamp": "2025-09-10 02:29:38.796848", "step": 3561, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:38.826087", "step": 3561, "epoch": 2 }, { "type": "loss", "content": 0.014284110628068447, "timestamp": "2025-09-10 02:29:38.827781", "step": 3562, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:38.856812", "step": 3562, "epoch": 2 }, { "type": "loss", "content": 0.0003184565284755081, "timestamp": "2025-09-10 02:29:38.858825", "step": 3563, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:38.887686", "step": 3563, "epoch": 2 }, { "type": "loss", "content": 0.025357427075505257, "timestamp": "2025-09-10 02:29:38.911382", "step": 3564, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:38.940598", "step": 3564, "epoch": 2 }, { "type": "loss", "content": 0.022137897089123726, "timestamp": "2025-09-10 02:29:38.942670", "step": 3565, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:38.971849", "step": 3565, "epoch": 2 }, { "type": "loss", "content": 0.0037835354451090097, "timestamp": "2025-09-10 02:29:38.973670", "step": 3566, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:39.003703", "step": 3566, "epoch": 2 }, { "type": "loss", "content": 0.027557658031582832, "timestamp": "2025-09-10 02:29:39.005622", "step": 3567, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:39.034227", "step": 3567, "epoch": 2 }, { "type": "loss", "content": 0.0037048538215458393, "timestamp": "2025-09-10 02:29:39.057920", "step": 3568, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:39.086916", "step": 3568, "epoch": 2 }, { "type": "loss", "content": 0.025103973224759102, "timestamp": "2025-09-10 02:29:39.088898", "step": 3569, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:39.117819", "step": 3569, "epoch": 2 }, { "type": "loss", "content": 0.0010510541032999754, "timestamp": "2025-09-10 02:29:39.119763", "step": 3570, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:39.148279", "step": 3570, "epoch": 2 }, { "type": "loss", "content": 0.006197761744260788, "timestamp": "2025-09-10 02:29:39.150163", "step": 3571, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:39.180052", "step": 3571, "epoch": 2 }, { "type": "loss", "content": 0.005343415774405003, "timestamp": "2025-09-10 02:29:39.203396", "step": 3572, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:39.235151", "step": 3572, "epoch": 2 }, { "type": "loss", "content": 0.0004703709564637393, "timestamp": "2025-09-10 02:29:39.236903", "step": 3573, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:39.269100", "step": 3573, "epoch": 2 }, { "type": "loss", "content": 0.03291929513216019, "timestamp": "2025-09-10 02:29:39.271075", "step": 3574, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:39.299961", "step": 3574, "epoch": 2 }, { "type": "loss", "content": 0.015463131479918957, "timestamp": "2025-09-10 02:29:39.301829", "step": 3575, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:39.330732", "step": 3575, "epoch": 2 }, { "type": "loss", "content": 0.013998471200466156, "timestamp": "2025-09-10 02:29:39.354382", "step": 3576, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:39.390111", "step": 3576, "epoch": 2 }, { "type": "loss", "content": 0.00035301089519634843, "timestamp": "2025-09-10 02:29:39.391825", "step": 3577, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:39.421640", "step": 3577, "epoch": 2 }, { "type": "loss", "content": 0.0246095210313797, "timestamp": "2025-09-10 02:29:39.423527", "step": 3578, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:39.452902", "step": 3578, "epoch": 2 }, { "type": "loss", "content": 0.0017538743559271097, "timestamp": "2025-09-10 02:29:39.454921", "step": 3579, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:39.484002", "step": 3579, "epoch": 2 }, { "type": "loss", "content": 0.0009338590898551047, "timestamp": "2025-09-10 02:29:39.507483", "step": 3580, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:39.537578", "step": 3580, "epoch": 2 }, { "type": "loss", "content": 0.00333792925812304, "timestamp": "2025-09-10 02:29:39.539396", "step": 3581, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:39.568248", "step": 3581, "epoch": 2 }, { "type": "loss", "content": 0.019255487248301506, "timestamp": "2025-09-10 02:29:39.570346", "step": 3582, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:39.599487", "step": 3582, "epoch": 2 }, { "type": "loss", "content": 0.031330667436122894, "timestamp": "2025-09-10 02:29:39.601413", "step": 3583, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:39.630429", "step": 3583, "epoch": 2 }, { "type": "loss", "content": 0.00597680127248168, "timestamp": "2025-09-10 02:29:39.653885", "step": 3584, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:39.682516", "step": 3584, "epoch": 2 }, { "type": "loss", "content": 0.0011178290005773306, "timestamp": "2025-09-10 02:29:39.684541", "step": 3585, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:39.713080", "step": 3585, "epoch": 2 }, { "type": "loss", "content": 0.0012423843145370483, "timestamp": "2025-09-10 02:29:39.715040", "step": 3586, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:39.743530", "step": 3586, "epoch": 2 }, { "type": "loss", "content": 0.0043282704427838326, "timestamp": "2025-09-10 02:29:39.745362", "step": 3587, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:39.774411", "step": 3587, "epoch": 2 }, { "type": "loss", "content": 0.0014382840599864721, "timestamp": "2025-09-10 02:29:39.797974", "step": 3588, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:39.826950", "step": 3588, "epoch": 2 }, { "type": "loss", "content": 0.022662704810500145, "timestamp": "2025-09-10 02:29:39.828788", "step": 3589, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:39.857718", "step": 3589, "epoch": 2 }, { "type": "loss", "content": 0.0030301802325993776, "timestamp": "2025-09-10 02:29:39.859406", "step": 3590, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:39.888396", "step": 3590, "epoch": 2 }, { "type": "loss", "content": 0.0027410441543906927, "timestamp": "2025-09-10 02:29:39.890143", "step": 3591, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:39.919870", "step": 3591, "epoch": 2 }, { "type": "loss", "content": 0.0033905827440321445, "timestamp": "2025-09-10 02:29:39.943159", "step": 3592, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:39.972171", "step": 3592, "epoch": 2 }, { "type": "loss", "content": 0.028375597670674324, "timestamp": "2025-09-10 02:29:39.974105", "step": 3593, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:40.004486", "step": 3593, "epoch": 2 }, { "type": "loss", "content": 0.029348796233534813, "timestamp": "2025-09-10 02:29:40.006320", "step": 3594, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:40.035364", "step": 3594, "epoch": 2 }, { "type": "loss", "content": 0.0023193187080323696, "timestamp": "2025-09-10 02:29:40.037161", "step": 3595, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:40.065532", "step": 3595, "epoch": 2 }, { "type": "loss", "content": 0.003464184468612075, "timestamp": "2025-09-10 02:29:40.089073", "step": 3596, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:40.118192", "step": 3596, "epoch": 2 }, { "type": "loss", "content": 0.06731926649808884, "timestamp": "2025-09-10 02:29:40.120299", "step": 3597, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:40.149152", "step": 3597, "epoch": 2 }, { "type": "loss", "content": 0.0005612451932393014, "timestamp": "2025-09-10 02:29:40.155596", "step": 3598, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:40.183926", "step": 3598, "epoch": 2 }, { "type": "loss", "content": 0.0018065435579046607, "timestamp": "2025-09-10 02:29:40.185954", "step": 3599, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:40.214489", "step": 3599, "epoch": 2 }, { "type": "loss", "content": 0.001333405263721943, "timestamp": "2025-09-10 02:29:40.237912", "step": 3600, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:40.266931", "step": 3600, "epoch": 2 }, { "type": "loss", "content": 0.0012239479692652822, "timestamp": "2025-09-10 02:29:40.268947", "step": 3601, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:40.297507", "step": 3601, "epoch": 2 }, { "type": "loss", "content": 0.004321325104683638, "timestamp": "2025-09-10 02:29:40.299658", "step": 3602, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:40.328340", "step": 3602, "epoch": 2 }, { "type": "loss", "content": 0.003785841166973114, "timestamp": "2025-09-10 02:29:40.330270", "step": 3603, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:40.359365", "step": 3603, "epoch": 2 }, { "type": "loss", "content": 0.015127211809158325, "timestamp": "2025-09-10 02:29:40.382795", "step": 3604, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:40.411306", "step": 3604, "epoch": 2 }, { "type": "loss", "content": 0.002256478648632765, "timestamp": "2025-09-10 02:29:40.413114", "step": 3605, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:40.441854", "step": 3605, "epoch": 2 }, { "type": "loss", "content": 0.00017908155859913677, "timestamp": "2025-09-10 02:29:40.443763", "step": 3606, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:40.473100", "step": 3606, "epoch": 2 }, { "type": "loss", "content": 0.0023490507155656815, "timestamp": "2025-09-10 02:29:40.474963", "step": 3607, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:40.503398", "step": 3607, "epoch": 2 }, { "type": "loss", "content": 0.028720121830701828, "timestamp": "2025-09-10 02:29:40.526927", "step": 3608, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:40.563503", "step": 3608, "epoch": 2 }, { "type": "loss", "content": 0.0009512993274256587, "timestamp": "2025-09-10 02:29:40.565294", "step": 3609, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:40.594105", "step": 3609, "epoch": 2 }, { "type": "loss", "content": 0.001343045150861144, "timestamp": "2025-09-10 02:29:40.595963", "step": 3610, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:40.631908", "step": 3610, "epoch": 2 }, { "type": "loss", "content": 0.00167557701934129, "timestamp": "2025-09-10 02:29:40.634504", "step": 3611, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:40.663199", "step": 3611, "epoch": 2 }, { "type": "loss", "content": 0.0007090084836818278, "timestamp": "2025-09-10 02:29:40.686606", "step": 3612, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:40.716893", "step": 3612, "epoch": 2 }, { "type": "loss", "content": 0.00794034544378519, "timestamp": "2025-09-10 02:29:40.718910", "step": 3613, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:40.754810", "step": 3613, "epoch": 2 }, { "type": "loss", "content": 0.0007401722832582891, "timestamp": "2025-09-10 02:29:40.757011", "step": 3614, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:29:40.792395", "step": 3614, "epoch": 2 }, { "type": "loss", "content": 0.035306137055158615, "timestamp": "2025-09-10 02:29:40.794412", "step": 3615, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:40.825297", "step": 3615, "epoch": 2 }, { "type": "loss", "content": 0.02245972864329815, "timestamp": "2025-09-10 02:29:40.849039", "step": 3616, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:40.881486", "step": 3616, "epoch": 2 }, { "type": "loss", "content": 0.0003232559538446367, "timestamp": "2025-09-10 02:29:40.883871", "step": 3617, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:40.914878", "step": 3617, "epoch": 2 }, { "type": "loss", "content": 0.020015936344861984, "timestamp": "2025-09-10 02:29:40.921512", "step": 3618, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:40.954943", "step": 3618, "epoch": 2 }, { "type": "loss", "content": 0.002817420056089759, "timestamp": "2025-09-10 02:29:40.961041", "step": 3619, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:40.993002", "step": 3619, "epoch": 2 }, { "type": "loss", "content": 0.043967586010694504, "timestamp": "2025-09-10 02:29:41.018849", "step": 3620, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:41.049809", "step": 3620, "epoch": 2 }, { "type": "loss", "content": 0.0570630244910717, "timestamp": "2025-09-10 02:29:41.052786", "step": 3621, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:41.081978", "step": 3621, "epoch": 2 }, { "type": "loss", "content": 0.004470278043299913, "timestamp": "2025-09-10 02:29:41.084857", "step": 3622, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:41.114270", "step": 3622, "epoch": 2 }, { "type": "loss", "content": 0.014231848530471325, "timestamp": "2025-09-10 02:29:41.116302", "step": 3623, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:41.146252", "step": 3623, "epoch": 2 }, { "type": "loss", "content": 0.0008056826191022992, "timestamp": "2025-09-10 02:29:41.169635", "step": 3624, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:41.201255", "step": 3624, "epoch": 2 }, { "type": "loss", "content": 0.00075248145731166, "timestamp": "2025-09-10 02:29:41.203188", "step": 3625, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:41.232301", "step": 3625, "epoch": 2 }, { "type": "loss", "content": 0.0004431054985616356, "timestamp": "2025-09-10 02:29:41.234341", "step": 3626, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:41.266107", "step": 3626, "epoch": 2 }, { "type": "loss", "content": 0.057048819959163666, "timestamp": "2025-09-10 02:29:41.270080", "step": 3627, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:41.301025", "step": 3627, "epoch": 2 }, { "type": "loss", "content": 0.0006368341855704784, "timestamp": "2025-09-10 02:29:41.326998", "step": 3628, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:41.361272", "step": 3628, "epoch": 2 }, { "type": "loss", "content": 0.01359105296432972, "timestamp": "2025-09-10 02:29:41.367522", "step": 3629, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:41.397693", "step": 3629, "epoch": 2 }, { "type": "loss", "content": 0.009940946474671364, "timestamp": "2025-09-10 02:29:41.399775", "step": 3630, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:41.430334", "step": 3630, "epoch": 2 }, { "type": "loss", "content": 0.01117002870887518, "timestamp": "2025-09-10 02:29:41.432447", "step": 3631, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:41.465814", "step": 3631, "epoch": 2 }, { "type": "loss", "content": 0.04185846075415611, "timestamp": "2025-09-10 02:29:41.489246", "step": 3632, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:41.519393", "step": 3632, "epoch": 2 }, { "type": "loss", "content": 0.007980273105204105, "timestamp": "2025-09-10 02:29:41.521333", "step": 3633, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:41.550420", "step": 3633, "epoch": 2 }, { "type": "loss", "content": 0.0005918587557971478, "timestamp": "2025-09-10 02:29:41.555172", "step": 3634, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:41.590700", "step": 3634, "epoch": 2 }, { "type": "loss", "content": 0.0007793071563355625, "timestamp": "2025-09-10 02:29:41.592700", "step": 3635, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:41.621567", "step": 3635, "epoch": 2 }, { "type": "loss", "content": 0.004654579795897007, "timestamp": "2025-09-10 02:29:41.644982", "step": 3636, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:41.674048", "step": 3636, "epoch": 2 }, { "type": "loss", "content": 0.018719719722867012, "timestamp": "2025-09-10 02:29:41.675911", "step": 3637, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:41.706175", "step": 3637, "epoch": 2 }, { "type": "loss", "content": 0.007895631715655327, "timestamp": "2025-09-10 02:29:41.708048", "step": 3638, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:41.736802", "step": 3638, "epoch": 2 }, { "type": "loss", "content": 0.0034556484315544367, "timestamp": "2025-09-10 02:29:41.738838", "step": 3639, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:41.767304", "step": 3639, "epoch": 2 }, { "type": "loss", "content": 0.001310615218244493, "timestamp": "2025-09-10 02:29:41.791303", "step": 3640, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:41.820525", "step": 3640, "epoch": 2 }, { "type": "loss", "content": 0.007575228810310364, "timestamp": "2025-09-10 02:29:41.822780", "step": 3641, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:41.854705", "step": 3641, "epoch": 2 }, { "type": "loss", "content": 0.0037589308340102434, "timestamp": "2025-09-10 02:29:41.858482", "step": 3642, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:41.889651", "step": 3642, "epoch": 2 }, { "type": "loss", "content": 0.0017815810861065984, "timestamp": "2025-09-10 02:29:41.891498", "step": 3643, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:41.920561", "step": 3643, "epoch": 2 }, { "type": "loss", "content": 0.003863951889798045, "timestamp": "2025-09-10 02:29:41.944394", "step": 3644, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:41.973264", "step": 3644, "epoch": 2 }, { "type": "loss", "content": 0.000795595406088978, "timestamp": "2025-09-10 02:29:41.975090", "step": 3645, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:42.003616", "step": 3645, "epoch": 2 }, { "type": "loss", "content": 0.01848900318145752, "timestamp": "2025-09-10 02:29:42.006599", "step": 3646, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:42.039592", "step": 3646, "epoch": 2 }, { "type": "loss", "content": 0.011643946170806885, "timestamp": "2025-09-10 02:29:42.045962", "step": 3647, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:42.077836", "step": 3647, "epoch": 2 }, { "type": "loss", "content": 0.002694113878533244, "timestamp": "2025-09-10 02:29:42.101136", "step": 3648, "epoch": 2 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 96 ], "batch_size": 8, "flops": 1898404492032 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 5, 80 ], "batch_size": 8, "flops": 1582003754624 } ], "timestamp": "2025-09-10 02:29:44.126698", "step": 3648, "epoch": 2 }, { "type": "pplx", "content": 2284447.3901765565, "timestamp": "2025-09-10 02:29:44.128854", "step": 3648, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:44.156395", "step": 3648, "epoch": 2 }, { "type": "loss", "content": 0.027025112882256508, "timestamp": "2025-09-10 02:29:44.161102", "step": 3649, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:44.191047", "step": 3649, "epoch": 2 }, { "type": "loss", "content": 0.004025637172162533, "timestamp": "2025-09-10 02:29:44.193140", "step": 3650, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:44.222591", "step": 3650, "epoch": 2 }, { "type": "loss", "content": 0.0014656836865469813, "timestamp": "2025-09-10 02:29:44.224564", "step": 3651, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:44.253352", "step": 3651, "epoch": 2 }, { "type": "loss", "content": 0.003369029611349106, "timestamp": "2025-09-10 02:29:44.277028", "step": 3652, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:44.306602", "step": 3652, "epoch": 2 }, { "type": "loss", "content": 0.002173537854105234, "timestamp": "2025-09-10 02:29:44.308301", "step": 3653, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:44.337315", "step": 3653, "epoch": 2 }, { "type": "loss", "content": 0.033811409026384354, "timestamp": "2025-09-10 02:29:44.339435", "step": 3654, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:44.368581", "step": 3654, "epoch": 2 }, { "type": "loss", "content": 0.006262099836021662, "timestamp": "2025-09-10 02:29:44.370495", "step": 3655, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:44.399621", "step": 3655, "epoch": 2 }, { "type": "loss", "content": 0.007304125931113958, "timestamp": "2025-09-10 02:29:44.422966", "step": 3656, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:44.452397", "step": 3656, "epoch": 2 }, { "type": "loss", "content": 0.004982686601579189, "timestamp": "2025-09-10 02:29:44.454264", "step": 3657, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:44.483863", "step": 3657, "epoch": 2 }, { "type": "loss", "content": 0.0073280492797493935, "timestamp": "2025-09-10 02:29:44.487318", "step": 3658, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:44.517012", "step": 3658, "epoch": 2 }, { "type": "loss", "content": 0.0029368854593485594, "timestamp": "2025-09-10 02:29:44.519002", "step": 3659, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:44.549468", "step": 3659, "epoch": 2 }, { "type": "loss", "content": 0.0009214190649800003, "timestamp": "2025-09-10 02:29:44.573088", "step": 3660, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:44.603013", "step": 3660, "epoch": 2 }, { "type": "loss", "content": 0.01310895849019289, "timestamp": "2025-09-10 02:29:44.604984", "step": 3661, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:44.634038", "step": 3661, "epoch": 2 }, { "type": "loss", "content": 0.0038477382622659206, "timestamp": "2025-09-10 02:29:44.635836", "step": 3662, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:44.664723", "step": 3662, "epoch": 2 }, { "type": "loss", "content": 0.017818301916122437, "timestamp": "2025-09-10 02:29:44.667211", "step": 3663, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:44.696327", "step": 3663, "epoch": 2 }, { "type": "loss", "content": 0.0033985336776822805, "timestamp": "2025-09-10 02:29:44.725305", "step": 3664, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:44.755101", "step": 3664, "epoch": 2 }, { "type": "loss", "content": 0.015548835508525372, "timestamp": "2025-09-10 02:29:44.756957", "step": 3665, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:44.785922", "step": 3665, "epoch": 2 }, { "type": "loss", "content": 0.0015692234737798572, "timestamp": "2025-09-10 02:29:44.788037", "step": 3666, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:44.817352", "step": 3666, "epoch": 2 }, { "type": "loss", "content": 0.026195894926786423, "timestamp": "2025-09-10 02:29:44.819192", "step": 3667, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:44.848545", "step": 3667, "epoch": 2 }, { "type": "loss", "content": 0.015752393752336502, "timestamp": "2025-09-10 02:29:44.871929", "step": 3668, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:44.901285", "step": 3668, "epoch": 2 }, { "type": "loss", "content": 0.03612801805138588, "timestamp": "2025-09-10 02:29:44.903304", "step": 3669, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:44.932536", "step": 3669, "epoch": 2 }, { "type": "loss", "content": 0.00046271312749013305, "timestamp": "2025-09-10 02:29:44.934368", "step": 3670, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:44.963490", "step": 3670, "epoch": 2 }, { "type": "loss", "content": 0.002088590059429407, "timestamp": "2025-09-10 02:29:44.965588", "step": 3671, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:44.994597", "step": 3671, "epoch": 2 }, { "type": "loss", "content": 0.0005249869427643716, "timestamp": "2025-09-10 02:29:45.017756", "step": 3672, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:45.046660", "step": 3672, "epoch": 2 }, { "type": "loss", "content": 0.03901727870106697, "timestamp": "2025-09-10 02:29:45.048378", "step": 3673, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:45.077584", "step": 3673, "epoch": 2 }, { "type": "loss", "content": 0.008338806219398975, "timestamp": "2025-09-10 02:29:45.079415", "step": 3674, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:45.108870", "step": 3674, "epoch": 2 }, { "type": "loss", "content": 0.0008816872723400593, "timestamp": "2025-09-10 02:29:45.110647", "step": 3675, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:45.139433", "step": 3675, "epoch": 2 }, { "type": "loss", "content": 0.024514347314834595, "timestamp": "2025-09-10 02:29:45.162601", "step": 3676, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:45.191212", "step": 3676, "epoch": 2 }, { "type": "loss", "content": 0.009860238060355186, "timestamp": "2025-09-10 02:29:45.192807", "step": 3677, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:45.221868", "step": 3677, "epoch": 2 }, { "type": "loss", "content": 0.04991964250802994, "timestamp": "2025-09-10 02:29:45.224017", "step": 3678, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:45.252973", "step": 3678, "epoch": 2 }, { "type": "loss", "content": 0.02714690938591957, "timestamp": "2025-09-10 02:29:45.255099", "step": 3679, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:45.284237", "step": 3679, "epoch": 2 }, { "type": "loss", "content": 0.003578531090170145, "timestamp": "2025-09-10 02:29:45.307659", "step": 3680, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:45.337122", "step": 3680, "epoch": 2 }, { "type": "loss", "content": 0.022830527275800705, "timestamp": "2025-09-10 02:29:45.339126", "step": 3681, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:45.368573", "step": 3681, "epoch": 2 }, { "type": "loss", "content": 0.03104826994240284, "timestamp": "2025-09-10 02:29:45.370416", "step": 3682, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:29:45.400070", "step": 3682, "epoch": 2 }, { "type": "loss", "content": 0.004579231142997742, "timestamp": "2025-09-10 02:29:45.401966", "step": 3683, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:45.430778", "step": 3683, "epoch": 2 }, { "type": "loss", "content": 0.004558298271149397, "timestamp": "2025-09-10 02:29:45.454081", "step": 3684, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:45.483034", "step": 3684, "epoch": 2 }, { "type": "loss", "content": 0.011908398941159248, "timestamp": "2025-09-10 02:29:45.484786", "step": 3685, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:45.513470", "step": 3685, "epoch": 2 }, { "type": "loss", "content": 0.015865741297602654, "timestamp": "2025-09-10 02:29:45.515192", "step": 3686, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:45.544192", "step": 3686, "epoch": 2 }, { "type": "loss", "content": 0.009198537096381187, "timestamp": "2025-09-10 02:29:45.546231", "step": 3687, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:45.576016", "step": 3687, "epoch": 2 }, { "type": "loss", "content": 0.030312685295939445, "timestamp": "2025-09-10 02:29:45.599341", "step": 3688, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:45.628710", "step": 3688, "epoch": 2 }, { "type": "loss", "content": 0.0030524202156811953, "timestamp": "2025-09-10 02:29:45.630726", "step": 3689, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:45.659673", "step": 3689, "epoch": 2 }, { "type": "loss", "content": 0.009920709766447544, "timestamp": "2025-09-10 02:29:45.661520", "step": 3690, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:45.690247", "step": 3690, "epoch": 2 }, { "type": "loss", "content": 0.0010171084431931376, "timestamp": "2025-09-10 02:29:45.692245", "step": 3691, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:45.723403", "step": 3691, "epoch": 2 }, { "type": "loss", "content": 0.02838117443025112, "timestamp": "2025-09-10 02:29:45.746811", "step": 3692, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:45.776143", "step": 3692, "epoch": 2 }, { "type": "loss", "content": 0.05404944345355034, "timestamp": "2025-09-10 02:29:45.778399", "step": 3693, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:45.807484", "step": 3693, "epoch": 2 }, { "type": "loss", "content": 0.0054108272306621075, "timestamp": "2025-09-10 02:29:45.809633", "step": 3694, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:45.838904", "step": 3694, "epoch": 2 }, { "type": "loss", "content": 0.005463256500661373, "timestamp": "2025-09-10 02:29:45.840502", "step": 3695, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:45.869899", "step": 3695, "epoch": 2 }, { "type": "loss", "content": 0.001773970085196197, "timestamp": "2025-09-10 02:29:45.893082", "step": 3696, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:45.922866", "step": 3696, "epoch": 2 }, { "type": "loss", "content": 0.006801222451031208, "timestamp": "2025-09-10 02:29:45.924539", "step": 3697, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:45.953640", "step": 3697, "epoch": 2 }, { "type": "loss", "content": 0.0006268368451856077, "timestamp": "2025-09-10 02:29:45.955530", "step": 3698, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:45.984873", "step": 3698, "epoch": 2 }, { "type": "loss", "content": 0.006464776117354631, "timestamp": "2025-09-10 02:29:45.986761", "step": 3699, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:46.015926", "step": 3699, "epoch": 2 }, { "type": "loss", "content": 0.026705535128712654, "timestamp": "2025-09-10 02:29:46.039313", "step": 3700, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:46.068635", "step": 3700, "epoch": 2 }, { "type": "loss", "content": 0.0059391348622739315, "timestamp": "2025-09-10 02:29:46.070582", "step": 3701, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:46.099394", "step": 3701, "epoch": 2 }, { "type": "loss", "content": 0.06555979698896408, "timestamp": "2025-09-10 02:29:46.101382", "step": 3702, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:46.130917", "step": 3702, "epoch": 2 }, { "type": "loss", "content": 0.00795739609748125, "timestamp": "2025-09-10 02:29:46.132751", "step": 3703, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:46.161358", "step": 3703, "epoch": 2 }, { "type": "loss", "content": 0.00283786840736866, "timestamp": "2025-09-10 02:29:46.184685", "step": 3704, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:46.213974", "step": 3704, "epoch": 2 }, { "type": "loss", "content": 0.0019304228480905294, "timestamp": "2025-09-10 02:29:46.215845", "step": 3705, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:46.244444", "step": 3705, "epoch": 2 }, { "type": "loss", "content": 0.014217148534953594, "timestamp": "2025-09-10 02:29:46.246225", "step": 3706, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:46.275060", "step": 3706, "epoch": 2 }, { "type": "loss", "content": 0.0016484770458191633, "timestamp": "2025-09-10 02:29:46.276972", "step": 3707, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:46.305795", "step": 3707, "epoch": 2 }, { "type": "loss", "content": 0.00168976082932204, "timestamp": "2025-09-10 02:29:46.329297", "step": 3708, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:46.358028", "step": 3708, "epoch": 2 }, { "type": "loss", "content": 0.00806464534252882, "timestamp": "2025-09-10 02:29:46.359778", "step": 3709, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:46.388534", "step": 3709, "epoch": 2 }, { "type": "loss", "content": 0.0006172872963361442, "timestamp": "2025-09-10 02:29:46.390313", "step": 3710, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:46.419396", "step": 3710, "epoch": 2 }, { "type": "loss", "content": 0.0014022333780303597, "timestamp": "2025-09-10 02:29:46.421375", "step": 3711, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:46.450540", "step": 3711, "epoch": 2 }, { "type": "loss", "content": 0.000817774620372802, "timestamp": "2025-09-10 02:29:46.473868", "step": 3712, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:46.502406", "step": 3712, "epoch": 2 }, { "type": "loss", "content": 0.0027491210494190454, "timestamp": "2025-09-10 02:29:46.504433", "step": 3713, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:46.533182", "step": 3713, "epoch": 2 }, { "type": "loss", "content": 0.017350925132632256, "timestamp": "2025-09-10 02:29:46.535052", "step": 3714, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:46.563994", "step": 3714, "epoch": 2 }, { "type": "loss", "content": 0.0004906453541480005, "timestamp": "2025-09-10 02:29:46.565820", "step": 3715, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:46.594812", "step": 3715, "epoch": 2 }, { "type": "loss", "content": 0.00037716422230005264, "timestamp": "2025-09-10 02:29:46.618317", "step": 3716, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:29:46.648114", "step": 3716, "epoch": 2 }, { "type": "loss", "content": 0.0031613376922905445, "timestamp": "2025-09-10 02:29:46.649895", "step": 3717, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:46.679342", "step": 3717, "epoch": 2 }, { "type": "loss", "content": 0.0023560025729238987, "timestamp": "2025-09-10 02:29:46.681151", "step": 3718, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:46.715004", "step": 3718, "epoch": 2 }, { "type": "loss", "content": 0.029003635048866272, "timestamp": "2025-09-10 02:29:46.716845", "step": 3719, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:46.746208", "step": 3719, "epoch": 2 }, { "type": "loss", "content": 0.002591664670035243, "timestamp": "2025-09-10 02:29:46.769401", "step": 3720, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:46.798228", "step": 3720, "epoch": 2 }, { "type": "loss", "content": 0.0003384334850125015, "timestamp": "2025-09-10 02:29:46.800280", "step": 3721, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:46.830554", "step": 3721, "epoch": 2 }, { "type": "loss", "content": 0.0429510697722435, "timestamp": "2025-09-10 02:29:46.832684", "step": 3722, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:46.861862", "step": 3722, "epoch": 2 }, { "type": "loss", "content": 0.006672355346381664, "timestamp": "2025-09-10 02:29:46.865037", "step": 3723, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:46.894006", "step": 3723, "epoch": 2 }, { "type": "loss", "content": 0.0007637891103513539, "timestamp": "2025-09-10 02:29:46.917379", "step": 3724, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:46.946830", "step": 3724, "epoch": 2 }, { "type": "loss", "content": 0.026889193803071976, "timestamp": "2025-09-10 02:29:46.950223", "step": 3725, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:46.980827", "step": 3725, "epoch": 2 }, { "type": "loss", "content": 0.019164739176630974, "timestamp": "2025-09-10 02:29:46.987669", "step": 3726, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:47.020094", "step": 3726, "epoch": 2 }, { "type": "loss", "content": 0.03033958561718464, "timestamp": "2025-09-10 02:29:47.022049", "step": 3727, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:47.051301", "step": 3727, "epoch": 2 }, { "type": "loss", "content": 0.023309703916311264, "timestamp": "2025-09-10 02:29:47.075376", "step": 3728, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:47.104579", "step": 3728, "epoch": 2 }, { "type": "loss", "content": 0.001016931259073317, "timestamp": "2025-09-10 02:29:47.106244", "step": 3729, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:47.135573", "step": 3729, "epoch": 2 }, { "type": "loss", "content": 0.04949323460459709, "timestamp": "2025-09-10 02:29:47.137516", "step": 3730, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:47.166935", "step": 3730, "epoch": 2 }, { "type": "loss", "content": 0.007128946948796511, "timestamp": "2025-09-10 02:29:47.168732", "step": 3731, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:47.197652", "step": 3731, "epoch": 2 }, { "type": "loss", "content": 0.000753446773160249, "timestamp": "2025-09-10 02:29:47.220806", "step": 3732, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:47.250155", "step": 3732, "epoch": 2 }, { "type": "loss", "content": 0.004715205170214176, "timestamp": "2025-09-10 02:29:47.251971", "step": 3733, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:47.281394", "step": 3733, "epoch": 2 }, { "type": "loss", "content": 0.0013004514621570706, "timestamp": "2025-09-10 02:29:47.283421", "step": 3734, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:47.312476", "step": 3734, "epoch": 2 }, { "type": "loss", "content": 0.001139021129347384, "timestamp": "2025-09-10 02:29:47.314290", "step": 3735, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:47.343534", "step": 3735, "epoch": 2 }, { "type": "loss", "content": 0.00520856911316514, "timestamp": "2025-09-10 02:29:47.367138", "step": 3736, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:47.396947", "step": 3736, "epoch": 2 }, { "type": "loss", "content": 0.002791530219838023, "timestamp": "2025-09-10 02:29:47.398787", "step": 3737, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:47.427538", "step": 3737, "epoch": 2 }, { "type": "loss", "content": 0.0020063193514943123, "timestamp": "2025-09-10 02:29:47.429388", "step": 3738, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:47.458038", "step": 3738, "epoch": 2 }, { "type": "loss", "content": 0.0008909571333788335, "timestamp": "2025-09-10 02:29:47.459776", "step": 3739, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:47.488647", "step": 3739, "epoch": 2 }, { "type": "loss", "content": 0.0017103239661082625, "timestamp": "2025-09-10 02:29:47.511715", "step": 3740, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:47.540914", "step": 3740, "epoch": 2 }, { "type": "loss", "content": 0.0011297070886939764, "timestamp": "2025-09-10 02:29:47.542543", "step": 3741, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:47.571826", "step": 3741, "epoch": 2 }, { "type": "loss", "content": 0.0004920273786410689, "timestamp": "2025-09-10 02:29:47.573348", "step": 3742, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:47.602000", "step": 3742, "epoch": 2 }, { "type": "loss", "content": 0.0008650109521113336, "timestamp": "2025-09-10 02:29:47.603784", "step": 3743, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:47.632395", "step": 3743, "epoch": 2 }, { "type": "loss", "content": 0.0024125610943883657, "timestamp": "2025-09-10 02:29:47.655904", "step": 3744, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:47.685282", "step": 3744, "epoch": 2 }, { "type": "loss", "content": 0.02579103223979473, "timestamp": "2025-09-10 02:29:47.687104", "step": 3745, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:47.715859", "step": 3745, "epoch": 2 }, { "type": "loss", "content": 0.06585411727428436, "timestamp": "2025-09-10 02:29:47.717426", "step": 3746, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:47.746867", "step": 3746, "epoch": 2 }, { "type": "loss", "content": 0.0029520683456212282, "timestamp": "2025-09-10 02:29:47.748725", "step": 3747, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:47.777420", "step": 3747, "epoch": 2 }, { "type": "loss", "content": 0.001344243180938065, "timestamp": "2025-09-10 02:29:47.800675", "step": 3748, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:47.829835", "step": 3748, "epoch": 2 }, { "type": "loss", "content": 0.000721413642168045, "timestamp": "2025-09-10 02:29:47.831588", "step": 3749, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:47.860353", "step": 3749, "epoch": 2 }, { "type": "loss", "content": 0.0006202346412464976, "timestamp": "2025-09-10 02:29:47.862153", "step": 3750, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:47.890971", "step": 3750, "epoch": 2 }, { "type": "loss", "content": 0.0008002677350305021, "timestamp": "2025-09-10 02:29:47.892800", "step": 3751, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:47.921734", "step": 3751, "epoch": 2 }, { "type": "loss", "content": 0.005500978324562311, "timestamp": "2025-09-10 02:29:47.944941", "step": 3752, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:47.974037", "step": 3752, "epoch": 2 }, { "type": "loss", "content": 0.00033267633989453316, "timestamp": "2025-09-10 02:29:47.975658", "step": 3753, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:48.004315", "step": 3753, "epoch": 2 }, { "type": "loss", "content": 0.00047051571891643107, "timestamp": "2025-09-10 02:29:48.006069", "step": 3754, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:48.034864", "step": 3754, "epoch": 2 }, { "type": "loss", "content": 0.0005520916311070323, "timestamp": "2025-09-10 02:29:48.036639", "step": 3755, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:48.065164", "step": 3755, "epoch": 2 }, { "type": "loss", "content": 0.03412267565727234, "timestamp": "2025-09-10 02:29:48.088585", "step": 3756, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:48.117569", "step": 3756, "epoch": 2 }, { "type": "loss", "content": 0.02543460950255394, "timestamp": "2025-09-10 02:29:48.119497", "step": 3757, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:48.147836", "step": 3757, "epoch": 2 }, { "type": "loss", "content": 0.001249514170922339, "timestamp": "2025-09-10 02:29:48.149774", "step": 3758, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:48.178619", "step": 3758, "epoch": 2 }, { "type": "loss", "content": 0.04360520467162132, "timestamp": "2025-09-10 02:29:48.180380", "step": 3759, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:48.209597", "step": 3759, "epoch": 2 }, { "type": "loss", "content": 0.0005400353693403304, "timestamp": "2025-09-10 02:29:48.232936", "step": 3760, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:48.261646", "step": 3760, "epoch": 2 }, { "type": "loss", "content": 0.0015497934073209763, "timestamp": "2025-09-10 02:29:48.263209", "step": 3761, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:48.291925", "step": 3761, "epoch": 2 }, { "type": "loss", "content": 0.002198526868596673, "timestamp": "2025-09-10 02:29:48.294963", "step": 3762, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:48.326470", "step": 3762, "epoch": 2 }, { "type": "loss", "content": 0.0051878029480576515, "timestamp": "2025-09-10 02:29:48.328095", "step": 3763, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:48.357323", "step": 3763, "epoch": 2 }, { "type": "loss", "content": 0.0006835527601651847, "timestamp": "2025-09-10 02:29:48.380899", "step": 3764, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:48.409703", "step": 3764, "epoch": 2 }, { "type": "loss", "content": 0.01905216835439205, "timestamp": "2025-09-10 02:29:48.411954", "step": 3765, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:48.441324", "step": 3765, "epoch": 2 }, { "type": "loss", "content": 0.0037093120627105236, "timestamp": "2025-09-10 02:29:48.443656", "step": 3766, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:48.473422", "step": 3766, "epoch": 2 }, { "type": "loss", "content": 0.036767762154340744, "timestamp": "2025-09-10 02:29:48.475200", "step": 3767, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:48.504045", "step": 3767, "epoch": 2 }, { "type": "loss", "content": 0.014642768539488316, "timestamp": "2025-09-10 02:29:48.527431", "step": 3768, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:48.556578", "step": 3768, "epoch": 2 }, { "type": "loss", "content": 0.0021930208895355463, "timestamp": "2025-09-10 02:29:48.558464", "step": 3769, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:48.587071", "step": 3769, "epoch": 2 }, { "type": "loss", "content": 0.031093696132302284, "timestamp": "2025-09-10 02:29:48.588905", "step": 3770, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:48.618606", "step": 3770, "epoch": 2 }, { "type": "loss", "content": 0.008391105569899082, "timestamp": "2025-09-10 02:29:48.620555", "step": 3771, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:48.649543", "step": 3771, "epoch": 2 }, { "type": "loss", "content": 0.03555647283792496, "timestamp": "2025-09-10 02:29:48.672859", "step": 3772, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:48.701939", "step": 3772, "epoch": 2 }, { "type": "loss", "content": 0.003431815654039383, "timestamp": "2025-09-10 02:29:48.703736", "step": 3773, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:48.732755", "step": 3773, "epoch": 2 }, { "type": "loss", "content": 0.03433075174689293, "timestamp": "2025-09-10 02:29:48.734653", "step": 3774, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:48.763553", "step": 3774, "epoch": 2 }, { "type": "loss", "content": 0.03858838230371475, "timestamp": "2025-09-10 02:29:48.765501", "step": 3775, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:48.794600", "step": 3775, "epoch": 2 }, { "type": "loss", "content": 0.031171072274446487, "timestamp": "2025-09-10 02:29:48.818146", "step": 3776, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:48.847106", "step": 3776, "epoch": 2 }, { "type": "loss", "content": 0.04389452189207077, "timestamp": "2025-09-10 02:29:48.848909", "step": 3777, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:48.878098", "step": 3777, "epoch": 2 }, { "type": "loss", "content": 0.01032222993671894, "timestamp": "2025-09-10 02:29:48.879947", "step": 3778, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:48.908971", "step": 3778, "epoch": 2 }, { "type": "loss", "content": 0.017558833584189415, "timestamp": "2025-09-10 02:29:48.910581", "step": 3779, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:48.939384", "step": 3779, "epoch": 2 }, { "type": "loss", "content": 0.03611234202980995, "timestamp": "2025-09-10 02:29:48.962984", "step": 3780, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:48.992051", "step": 3780, "epoch": 2 }, { "type": "loss", "content": 0.004638135898858309, "timestamp": "2025-09-10 02:29:48.994019", "step": 3781, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:49.023396", "step": 3781, "epoch": 2 }, { "type": "loss", "content": 0.053978390991687775, "timestamp": "2025-09-10 02:29:49.025111", "step": 3782, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:49.054357", "step": 3782, "epoch": 2 }, { "type": "loss", "content": 0.036939799785614014, "timestamp": "2025-09-10 02:29:49.056198", "step": 3783, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:49.085199", "step": 3783, "epoch": 2 }, { "type": "loss", "content": 0.01262232568114996, "timestamp": "2025-09-10 02:29:49.108523", "step": 3784, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:49.138461", "step": 3784, "epoch": 2 }, { "type": "loss", "content": 0.021416574716567993, "timestamp": "2025-09-10 02:29:49.140405", "step": 3785, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:49.169704", "step": 3785, "epoch": 2 }, { "type": "loss", "content": 0.006002691574394703, "timestamp": "2025-09-10 02:29:49.171570", "step": 3786, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:49.200609", "step": 3786, "epoch": 2 }, { "type": "loss", "content": 0.004044627770781517, "timestamp": "2025-09-10 02:29:49.202384", "step": 3787, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:49.231207", "step": 3787, "epoch": 2 }, { "type": "loss", "content": 0.0032630611676722765, "timestamp": "2025-09-10 02:29:49.254621", "step": 3788, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:49.284155", "step": 3788, "epoch": 2 }, { "type": "loss", "content": 0.007752821780741215, "timestamp": "2025-09-10 02:29:49.285895", "step": 3789, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:49.316182", "step": 3789, "epoch": 2 }, { "type": "loss", "content": 0.028348324820399284, "timestamp": "2025-09-10 02:29:49.317871", "step": 3790, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:49.347564", "step": 3790, "epoch": 2 }, { "type": "loss", "content": 0.007973059080541134, "timestamp": "2025-09-10 02:29:49.349321", "step": 3791, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:49.379247", "step": 3791, "epoch": 2 }, { "type": "loss", "content": 0.027207011356949806, "timestamp": "2025-09-10 02:29:49.402673", "step": 3792, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:49.432200", "step": 3792, "epoch": 2 }, { "type": "loss", "content": 0.02067798376083374, "timestamp": "2025-09-10 02:29:49.434036", "step": 3793, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:49.464046", "step": 3793, "epoch": 2 }, { "type": "loss", "content": 0.006651004310697317, "timestamp": "2025-09-10 02:29:49.466064", "step": 3794, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:49.495037", "step": 3794, "epoch": 2 }, { "type": "loss", "content": 0.009209489449858665, "timestamp": "2025-09-10 02:29:49.496917", "step": 3795, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:49.526371", "step": 3795, "epoch": 2 }, { "type": "loss", "content": 0.001799335121177137, "timestamp": "2025-09-10 02:29:49.549681", "step": 3796, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:49.579020", "step": 3796, "epoch": 2 }, { "type": "loss", "content": 0.02988806553184986, "timestamp": "2025-09-10 02:29:49.580775", "step": 3797, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:29:49.609572", "step": 3797, "epoch": 2 }, { "type": "loss", "content": 0.008221580646932125, "timestamp": "2025-09-10 02:29:49.611411", "step": 3798, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:49.640373", "step": 3798, "epoch": 2 }, { "type": "loss", "content": 0.003645301563665271, "timestamp": "2025-09-10 02:29:49.642185", "step": 3799, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:49.670691", "step": 3799, "epoch": 2 }, { "type": "loss", "content": 0.0063503882847726345, "timestamp": "2025-09-10 02:29:49.695410", "step": 3800, "epoch": 2 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 96 ], "batch_size": 8, "flops": 1898404492032 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 5, 80 ], "batch_size": 8, "flops": 1582003754624 } ], "timestamp": "2025-09-10 02:29:51.634475", "step": 3800, "epoch": 2 }, { "type": "pplx", "content": 2281451.019019619, "timestamp": "2025-09-10 02:29:51.639175", "step": 3800, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:51.669669", "step": 3800, "epoch": 2 }, { "type": "loss", "content": 0.020798875018954277, "timestamp": "2025-09-10 02:29:51.671587", "step": 3801, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:51.700573", "step": 3801, "epoch": 2 }, { "type": "loss", "content": 0.017957177013158798, "timestamp": "2025-09-10 02:29:51.702548", "step": 3802, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:51.732205", "step": 3802, "epoch": 2 }, { "type": "loss", "content": 0.020725306123495102, "timestamp": "2025-09-10 02:29:51.736947", "step": 3803, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:51.765926", "step": 3803, "epoch": 2 }, { "type": "loss", "content": 0.02220289036631584, "timestamp": "2025-09-10 02:29:51.789537", "step": 3804, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:51.821550", "step": 3804, "epoch": 2 }, { "type": "loss", "content": 0.04601726308465004, "timestamp": "2025-09-10 02:29:51.823409", "step": 3805, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:51.860711", "step": 3805, "epoch": 2 }, { "type": "loss", "content": 0.01492367684841156, "timestamp": "2025-09-10 02:29:51.862598", "step": 3806, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:51.893518", "step": 3806, "epoch": 2 }, { "type": "loss", "content": 0.005222121719270945, "timestamp": "2025-09-10 02:29:51.895253", "step": 3807, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:51.924993", "step": 3807, "epoch": 2 }, { "type": "loss", "content": 0.025623932480812073, "timestamp": "2025-09-10 02:29:51.948394", "step": 3808, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:51.978212", "step": 3808, "epoch": 2 }, { "type": "loss", "content": 0.012954866513609886, "timestamp": "2025-09-10 02:29:51.980248", "step": 3809, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:52.010042", "step": 3809, "epoch": 2 }, { "type": "loss", "content": 0.0058203525841236115, "timestamp": "2025-09-10 02:29:52.011664", "step": 3810, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:52.042102", "step": 3810, "epoch": 2 }, { "type": "loss", "content": 0.006981327198445797, "timestamp": "2025-09-10 02:29:52.044042", "step": 3811, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:52.072747", "step": 3811, "epoch": 2 }, { "type": "loss", "content": 0.009414931759238243, "timestamp": "2025-09-10 02:29:52.096091", "step": 3812, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:52.124917", "step": 3812, "epoch": 2 }, { "type": "loss", "content": 0.007098652422428131, "timestamp": "2025-09-10 02:29:52.126778", "step": 3813, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:52.155518", "step": 3813, "epoch": 2 }, { "type": "loss", "content": 0.03589503839612007, "timestamp": "2025-09-10 02:29:52.159707", "step": 3814, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:52.190405", "step": 3814, "epoch": 2 }, { "type": "loss", "content": 0.006379269063472748, "timestamp": "2025-09-10 02:29:52.192913", "step": 3815, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:52.223713", "step": 3815, "epoch": 2 }, { "type": "loss", "content": 0.08531519025564194, "timestamp": "2025-09-10 02:29:52.247080", "step": 3816, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:52.278995", "step": 3816, "epoch": 2 }, { "type": "loss", "content": 0.06141041964292526, "timestamp": "2025-09-10 02:29:52.281020", "step": 3817, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:52.309986", "step": 3817, "epoch": 2 }, { "type": "loss", "content": 0.02239351160824299, "timestamp": "2025-09-10 02:29:52.312237", "step": 3818, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:52.344613", "step": 3818, "epoch": 2 }, { "type": "loss", "content": 0.03820383921265602, "timestamp": "2025-09-10 02:29:52.346603", "step": 3819, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:52.375886", "step": 3819, "epoch": 2 }, { "type": "loss", "content": 0.0030169629026204348, "timestamp": "2025-09-10 02:29:52.399026", "step": 3820, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:52.427572", "step": 3820, "epoch": 2 }, { "type": "loss", "content": 0.007027463521808386, "timestamp": "2025-09-10 02:29:52.429532", "step": 3821, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:52.458811", "step": 3821, "epoch": 2 }, { "type": "loss", "content": 0.009036152623593807, "timestamp": "2025-09-10 02:29:52.464291", "step": 3822, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:52.495049", "step": 3822, "epoch": 2 }, { "type": "loss", "content": 0.004128528293222189, "timestamp": "2025-09-10 02:29:52.498604", "step": 3823, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:52.529177", "step": 3823, "epoch": 2 }, { "type": "loss", "content": 0.021956181153655052, "timestamp": "2025-09-10 02:29:52.552670", "step": 3824, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:52.581734", "step": 3824, "epoch": 2 }, { "type": "loss", "content": 0.002516851993277669, "timestamp": "2025-09-10 02:29:52.583442", "step": 3825, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:52.612584", "step": 3825, "epoch": 2 }, { "type": "loss", "content": 0.006461195647716522, "timestamp": "2025-09-10 02:29:52.614319", "step": 3826, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:52.643135", "step": 3826, "epoch": 2 }, { "type": "loss", "content": 0.003559216856956482, "timestamp": "2025-09-10 02:29:52.646534", "step": 3827, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:52.675342", "step": 3827, "epoch": 2 }, { "type": "loss", "content": 0.014234405942261219, "timestamp": "2025-09-10 02:29:52.698562", "step": 3828, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:52.727507", "step": 3828, "epoch": 2 }, { "type": "loss", "content": 0.06210624426603317, "timestamp": "2025-09-10 02:29:52.729422", "step": 3829, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:52.758667", "step": 3829, "epoch": 2 }, { "type": "loss", "content": 0.001260876771993935, "timestamp": "2025-09-10 02:29:52.760594", "step": 3830, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:52.790223", "step": 3830, "epoch": 2 }, { "type": "loss", "content": 0.0018323047552257776, "timestamp": "2025-09-10 02:29:52.792172", "step": 3831, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:52.824074", "step": 3831, "epoch": 2 }, { "type": "loss", "content": 0.014975875616073608, "timestamp": "2025-09-10 02:29:52.850385", "step": 3832, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:52.879599", "step": 3832, "epoch": 2 }, { "type": "loss", "content": 0.0037122422363609076, "timestamp": "2025-09-10 02:29:52.881668", "step": 3833, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:52.910432", "step": 3833, "epoch": 2 }, { "type": "loss", "content": 0.004388042259961367, "timestamp": "2025-09-10 02:29:52.912216", "step": 3834, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:52.941108", "step": 3834, "epoch": 2 }, { "type": "loss", "content": 0.008131971582770348, "timestamp": "2025-09-10 02:29:52.942902", "step": 3835, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:52.972428", "step": 3835, "epoch": 2 }, { "type": "loss", "content": 0.0006102101178839803, "timestamp": "2025-09-10 02:29:52.996118", "step": 3836, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:53.025459", "step": 3836, "epoch": 2 }, { "type": "loss", "content": 0.0015979270683601499, "timestamp": "2025-09-10 02:29:53.027219", "step": 3837, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:53.056985", "step": 3837, "epoch": 2 }, { "type": "loss", "content": 0.008117801509797573, "timestamp": "2025-09-10 02:29:53.058793", "step": 3838, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:53.088699", "step": 3838, "epoch": 2 }, { "type": "loss", "content": 0.004349916707724333, "timestamp": "2025-09-10 02:29:53.090396", "step": 3839, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:53.119904", "step": 3839, "epoch": 2 }, { "type": "loss", "content": 0.010418047197163105, "timestamp": "2025-09-10 02:29:53.143384", "step": 3840, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:53.173892", "step": 3840, "epoch": 2 }, { "type": "loss", "content": 0.012020133435726166, "timestamp": "2025-09-10 02:29:53.176000", "step": 3841, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:53.205072", "step": 3841, "epoch": 2 }, { "type": "loss", "content": 0.002090350491926074, "timestamp": "2025-09-10 02:29:53.206852", "step": 3842, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:53.236065", "step": 3842, "epoch": 2 }, { "type": "loss", "content": 0.003925625700503588, "timestamp": "2025-09-10 02:29:53.237926", "step": 3843, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:53.267326", "step": 3843, "epoch": 2 }, { "type": "loss", "content": 0.016873370856046677, "timestamp": "2025-09-10 02:29:53.290687", "step": 3844, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:53.320058", "step": 3844, "epoch": 2 }, { "type": "loss", "content": 0.021129081025719643, "timestamp": "2025-09-10 02:29:53.321839", "step": 3845, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:53.350549", "step": 3845, "epoch": 2 }, { "type": "loss", "content": 0.0009218491613864899, "timestamp": "2025-09-10 02:29:53.352638", "step": 3846, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:53.381637", "step": 3846, "epoch": 2 }, { "type": "loss", "content": 0.030607568100094795, "timestamp": "2025-09-10 02:29:53.383472", "step": 3847, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:53.413302", "step": 3847, "epoch": 2 }, { "type": "loss", "content": 0.0012057321146130562, "timestamp": "2025-09-10 02:29:53.436599", "step": 3848, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:53.465700", "step": 3848, "epoch": 2 }, { "type": "loss", "content": 0.004216120112687349, "timestamp": "2025-09-10 02:29:53.468969", "step": 3849, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:53.498087", "step": 3849, "epoch": 2 }, { "type": "loss", "content": 0.008416402153670788, "timestamp": "2025-09-10 02:29:53.501077", "step": 3850, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:53.531375", "step": 3850, "epoch": 2 }, { "type": "loss", "content": 0.012984177097678185, "timestamp": "2025-09-10 02:29:53.533247", "step": 3851, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:53.568477", "step": 3851, "epoch": 2 }, { "type": "loss", "content": 0.0030218735337257385, "timestamp": "2025-09-10 02:29:53.591761", "step": 3852, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:53.621706", "step": 3852, "epoch": 2 }, { "type": "loss", "content": 0.008891667239367962, "timestamp": "2025-09-10 02:29:53.623349", "step": 3853, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:53.655170", "step": 3853, "epoch": 2 }, { "type": "loss", "content": 0.0016418088926002383, "timestamp": "2025-09-10 02:29:53.657158", "step": 3854, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:53.694041", "step": 3854, "epoch": 2 }, { "type": "loss", "content": 0.0011525944573804736, "timestamp": "2025-09-10 02:29:53.695959", "step": 3855, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:53.731313", "step": 3855, "epoch": 2 }, { "type": "loss", "content": 0.0021038041450083256, "timestamp": "2025-09-10 02:29:53.754733", "step": 3856, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:53.784034", "step": 3856, "epoch": 2 }, { "type": "loss", "content": 0.0032909123692661524, "timestamp": "2025-09-10 02:29:53.785799", "step": 3857, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:53.814691", "step": 3857, "epoch": 2 }, { "type": "loss", "content": 0.0014812155859544873, "timestamp": "2025-09-10 02:29:53.816838", "step": 3858, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:53.845981", "step": 3858, "epoch": 2 }, { "type": "loss", "content": 0.045353662222623825, "timestamp": "2025-09-10 02:29:53.847683", "step": 3859, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:53.876484", "step": 3859, "epoch": 2 }, { "type": "loss", "content": 0.0009044137550517917, "timestamp": "2025-09-10 02:29:53.899887", "step": 3860, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:53.928640", "step": 3860, "epoch": 2 }, { "type": "loss", "content": 0.015513166785240173, "timestamp": "2025-09-10 02:29:53.930751", "step": 3861, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:53.959644", "step": 3861, "epoch": 2 }, { "type": "loss", "content": 0.036323368549346924, "timestamp": "2025-09-10 02:29:53.961539", "step": 3862, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:53.990198", "step": 3862, "epoch": 2 }, { "type": "loss", "content": 0.012267704121768475, "timestamp": "2025-09-10 02:29:53.991939", "step": 3863, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:54.021313", "step": 3863, "epoch": 2 }, { "type": "loss", "content": 0.0019978752825409174, "timestamp": "2025-09-10 02:29:54.044806", "step": 3864, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:54.073966", "step": 3864, "epoch": 2 }, { "type": "loss", "content": 0.004850670695304871, "timestamp": "2025-09-10 02:29:54.075704", "step": 3865, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:54.105090", "step": 3865, "epoch": 2 }, { "type": "loss", "content": 0.01693730801343918, "timestamp": "2025-09-10 02:29:54.107094", "step": 3866, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:54.137119", "step": 3866, "epoch": 2 }, { "type": "loss", "content": 0.0009086922509595752, "timestamp": "2025-09-10 02:29:54.139024", "step": 3867, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:54.168113", "step": 3867, "epoch": 2 }, { "type": "loss", "content": 0.0009111051331274211, "timestamp": "2025-09-10 02:29:54.191636", "step": 3868, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:54.223928", "step": 3868, "epoch": 2 }, { "type": "loss", "content": 0.015068517066538334, "timestamp": "2025-09-10 02:29:54.225992", "step": 3869, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:54.257797", "step": 3869, "epoch": 2 }, { "type": "loss", "content": 0.005425662267953157, "timestamp": "2025-09-10 02:29:54.259789", "step": 3870, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:54.291107", "step": 3870, "epoch": 2 }, { "type": "loss", "content": 0.002478309441357851, "timestamp": "2025-09-10 02:29:54.293117", "step": 3871, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:54.324421", "step": 3871, "epoch": 2 }, { "type": "loss", "content": 0.025403037667274475, "timestamp": "2025-09-10 02:29:54.354661", "step": 3872, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:54.384872", "step": 3872, "epoch": 2 }, { "type": "loss", "content": 0.0018272175220772624, "timestamp": "2025-09-10 02:29:54.386929", "step": 3873, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:54.416489", "step": 3873, "epoch": 2 }, { "type": "loss", "content": 0.00047846572124399245, "timestamp": "2025-09-10 02:29:54.420552", "step": 3874, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:54.450418", "step": 3874, "epoch": 2 }, { "type": "loss", "content": 0.0008692663977853954, "timestamp": "2025-09-10 02:29:54.453460", "step": 3875, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:54.483631", "step": 3875, "epoch": 2 }, { "type": "loss", "content": 0.0019001452019438148, "timestamp": "2025-09-10 02:29:54.507388", "step": 3876, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:54.540707", "step": 3876, "epoch": 2 }, { "type": "loss", "content": 0.0020460376981645823, "timestamp": "2025-09-10 02:29:54.542603", "step": 3877, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:54.571885", "step": 3877, "epoch": 2 }, { "type": "loss", "content": 0.056729283183813095, "timestamp": "2025-09-10 02:29:54.573888", "step": 3878, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:54.603442", "step": 3878, "epoch": 2 }, { "type": "loss", "content": 0.04931720346212387, "timestamp": "2025-09-10 02:29:54.605175", "step": 3879, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:54.633983", "step": 3879, "epoch": 2 }, { "type": "loss", "content": 0.0004895208985544741, "timestamp": "2025-09-10 02:29:54.657446", "step": 3880, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:54.686807", "step": 3880, "epoch": 2 }, { "type": "loss", "content": 0.04446374624967575, "timestamp": "2025-09-10 02:29:54.688563", "step": 3881, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:54.716972", "step": 3881, "epoch": 2 }, { "type": "loss", "content": 0.0009974639397114515, "timestamp": "2025-09-10 02:29:54.718955", "step": 3882, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:54.750289", "step": 3882, "epoch": 2 }, { "type": "loss", "content": 0.009860903024673462, "timestamp": "2025-09-10 02:29:54.752519", "step": 3883, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:54.782085", "step": 3883, "epoch": 2 }, { "type": "loss", "content": 0.0006639487110078335, "timestamp": "2025-09-10 02:29:54.805713", "step": 3884, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:54.835492", "step": 3884, "epoch": 2 }, { "type": "loss", "content": 0.056612931191921234, "timestamp": "2025-09-10 02:29:54.837386", "step": 3885, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:54.866246", "step": 3885, "epoch": 2 }, { "type": "loss", "content": 0.007797067053616047, "timestamp": "2025-09-10 02:29:54.868101", "step": 3886, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:54.910634", "step": 3886, "epoch": 2 }, { "type": "loss", "content": 0.007474190555512905, "timestamp": "2025-09-10 02:29:54.913767", "step": 3887, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:54.943004", "step": 3887, "epoch": 2 }, { "type": "loss", "content": 0.0011330494889989495, "timestamp": "2025-09-10 02:29:54.966553", "step": 3888, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:54.996465", "step": 3888, "epoch": 2 }, { "type": "loss", "content": 0.0007213219068944454, "timestamp": "2025-09-10 02:29:54.998363", "step": 3889, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:55.040692", "step": 3889, "epoch": 2 }, { "type": "loss", "content": 0.006773100234568119, "timestamp": "2025-09-10 02:29:55.042873", "step": 3890, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:55.072634", "step": 3890, "epoch": 2 }, { "type": "loss", "content": 0.0019626482389867306, "timestamp": "2025-09-10 02:29:55.074678", "step": 3891, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:55.103498", "step": 3891, "epoch": 2 }, { "type": "loss", "content": 0.0072317165322601795, "timestamp": "2025-09-10 02:29:55.126908", "step": 3892, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:55.156107", "step": 3892, "epoch": 2 }, { "type": "loss", "content": 0.02815018780529499, "timestamp": "2025-09-10 02:29:55.158094", "step": 3893, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:55.187345", "step": 3893, "epoch": 2 }, { "type": "loss", "content": 0.008434193208813667, "timestamp": "2025-09-10 02:29:55.189099", "step": 3894, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:55.217858", "step": 3894, "epoch": 2 }, { "type": "loss", "content": 0.0047177039086818695, "timestamp": "2025-09-10 02:29:55.219902", "step": 3895, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:55.248588", "step": 3895, "epoch": 2 }, { "type": "loss", "content": 0.0007047757389955223, "timestamp": "2025-09-10 02:29:55.271894", "step": 3896, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:55.301177", "step": 3896, "epoch": 2 }, { "type": "loss", "content": 0.00018565657956060022, "timestamp": "2025-09-10 02:29:55.303142", "step": 3897, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:55.332479", "step": 3897, "epoch": 2 }, { "type": "loss", "content": 0.031223705038428307, "timestamp": "2025-09-10 02:29:55.334249", "step": 3898, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:55.363809", "step": 3898, "epoch": 2 }, { "type": "loss", "content": 0.0027033479418605566, "timestamp": "2025-09-10 02:29:55.365558", "step": 3899, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:55.394506", "step": 3899, "epoch": 2 }, { "type": "loss", "content": 0.009265147149562836, "timestamp": "2025-09-10 02:29:55.417874", "step": 3900, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:55.446866", "step": 3900, "epoch": 2 }, { "type": "loss", "content": 0.00031578371999785304, "timestamp": "2025-09-10 02:29:55.448756", "step": 3901, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:55.477613", "step": 3901, "epoch": 2 }, { "type": "loss", "content": 0.05228476598858833, "timestamp": "2025-09-10 02:29:55.479579", "step": 3902, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:55.508499", "step": 3902, "epoch": 2 }, { "type": "loss", "content": 0.009154691360890865, "timestamp": "2025-09-10 02:29:55.510203", "step": 3903, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:55.539528", "step": 3903, "epoch": 2 }, { "type": "loss", "content": 0.0027278217021375895, "timestamp": "2025-09-10 02:29:55.563193", "step": 3904, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:55.593016", "step": 3904, "epoch": 2 }, { "type": "loss", "content": 0.0007913299486972392, "timestamp": "2025-09-10 02:29:55.595202", "step": 3905, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:55.624244", "step": 3905, "epoch": 2 }, { "type": "loss", "content": 0.013621260412037373, "timestamp": "2025-09-10 02:29:55.626138", "step": 3906, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:55.654998", "step": 3906, "epoch": 2 }, { "type": "loss", "content": 0.0011792125878855586, "timestamp": "2025-09-10 02:29:55.656886", "step": 3907, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:55.686349", "step": 3907, "epoch": 2 }, { "type": "loss", "content": 0.09010230749845505, "timestamp": "2025-09-10 02:29:55.709764", "step": 3908, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:55.740355", "step": 3908, "epoch": 2 }, { "type": "loss", "content": 0.03721442073583603, "timestamp": "2025-09-10 02:29:55.742235", "step": 3909, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:55.771049", "step": 3909, "epoch": 2 }, { "type": "loss", "content": 0.008620529435575008, "timestamp": "2025-09-10 02:29:55.772938", "step": 3910, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:55.801868", "step": 3910, "epoch": 2 }, { "type": "loss", "content": 0.0030499857384711504, "timestamp": "2025-09-10 02:29:55.803449", "step": 3911, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:55.832522", "step": 3911, "epoch": 2 }, { "type": "loss", "content": 0.0010884355287998915, "timestamp": "2025-09-10 02:29:55.855989", "step": 3912, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:55.885155", "step": 3912, "epoch": 2 }, { "type": "loss", "content": 0.009860835038125515, "timestamp": "2025-09-10 02:29:55.887125", "step": 3913, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:55.916524", "step": 3913, "epoch": 2 }, { "type": "loss", "content": 0.01391797699034214, "timestamp": "2025-09-10 02:29:55.918207", "step": 3914, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:55.947498", "step": 3914, "epoch": 2 }, { "type": "loss", "content": 0.0006923922337591648, "timestamp": "2025-09-10 02:29:55.949523", "step": 3915, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:55.979565", "step": 3915, "epoch": 2 }, { "type": "loss", "content": 0.02779540978372097, "timestamp": "2025-09-10 02:29:56.003168", "step": 3916, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:56.032851", "step": 3916, "epoch": 2 }, { "type": "loss", "content": 0.0006811950006522238, "timestamp": "2025-09-10 02:29:56.034876", "step": 3917, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:56.063704", "step": 3917, "epoch": 2 }, { "type": "loss", "content": 0.009511835873126984, "timestamp": "2025-09-10 02:29:56.065889", "step": 3918, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:56.095395", "step": 3918, "epoch": 2 }, { "type": "loss", "content": 0.0006021776353009045, "timestamp": "2025-09-10 02:29:56.097857", "step": 3919, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:56.127173", "step": 3919, "epoch": 2 }, { "type": "loss", "content": 0.043237872421741486, "timestamp": "2025-09-10 02:29:56.150893", "step": 3920, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:56.180573", "step": 3920, "epoch": 2 }, { "type": "loss", "content": 0.03712473064661026, "timestamp": "2025-09-10 02:29:56.182550", "step": 3921, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:56.212141", "step": 3921, "epoch": 2 }, { "type": "loss", "content": 0.09350786358118057, "timestamp": "2025-09-10 02:29:56.214113", "step": 3922, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:56.243208", "step": 3922, "epoch": 2 }, { "type": "loss", "content": 0.002584946108981967, "timestamp": "2025-09-10 02:29:56.245031", "step": 3923, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:56.274117", "step": 3923, "epoch": 2 }, { "type": "loss", "content": 0.00048091966891661286, "timestamp": "2025-09-10 02:29:56.297775", "step": 3924, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:56.326627", "step": 3924, "epoch": 2 }, { "type": "loss", "content": 0.02092219889163971, "timestamp": "2025-09-10 02:29:56.328554", "step": 3925, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:56.357750", "step": 3925, "epoch": 2 }, { "type": "loss", "content": 0.016485700383782387, "timestamp": "2025-09-10 02:29:56.359404", "step": 3926, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:56.388526", "step": 3926, "epoch": 2 }, { "type": "loss", "content": 0.03749697282910347, "timestamp": "2025-09-10 02:29:56.390567", "step": 3927, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:56.419573", "step": 3927, "epoch": 2 }, { "type": "loss", "content": 0.0031544025987386703, "timestamp": "2025-09-10 02:29:56.443031", "step": 3928, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:56.472735", "step": 3928, "epoch": 2 }, { "type": "loss", "content": 0.001070962636731565, "timestamp": "2025-09-10 02:29:56.474549", "step": 3929, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:56.504046", "step": 3929, "epoch": 2 }, { "type": "loss", "content": 0.02318684197962284, "timestamp": "2025-09-10 02:29:56.506161", "step": 3930, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:56.535569", "step": 3930, "epoch": 2 }, { "type": "loss", "content": 0.0021289566066116095, "timestamp": "2025-09-10 02:29:56.537595", "step": 3931, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:56.566299", "step": 3931, "epoch": 2 }, { "type": "loss", "content": 0.001401600195094943, "timestamp": "2025-09-10 02:29:56.589878", "step": 3932, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:56.618459", "step": 3932, "epoch": 2 }, { "type": "loss", "content": 0.015513087622821331, "timestamp": "2025-09-10 02:29:56.620540", "step": 3933, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:56.649551", "step": 3933, "epoch": 2 }, { "type": "loss", "content": 0.003620770527049899, "timestamp": "2025-09-10 02:29:56.651542", "step": 3934, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:56.680508", "step": 3934, "epoch": 2 }, { "type": "loss", "content": 0.009311458095908165, "timestamp": "2025-09-10 02:29:56.682380", "step": 3935, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:56.711359", "step": 3935, "epoch": 2 }, { "type": "loss", "content": 0.0014422357780858874, "timestamp": "2025-09-10 02:29:56.734709", "step": 3936, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:56.763344", "step": 3936, "epoch": 2 }, { "type": "loss", "content": 0.0011237767757847905, "timestamp": "2025-09-10 02:29:56.765294", "step": 3937, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:56.794767", "step": 3937, "epoch": 2 }, { "type": "loss", "content": 0.01559279952198267, "timestamp": "2025-09-10 02:29:56.796479", "step": 3938, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:56.825995", "step": 3938, "epoch": 2 }, { "type": "loss", "content": 0.00151608616579324, "timestamp": "2025-09-10 02:29:56.827859", "step": 3939, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:56.856971", "step": 3939, "epoch": 2 }, { "type": "loss", "content": 0.01965700089931488, "timestamp": "2025-09-10 02:29:56.880555", "step": 3940, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:56.911272", "step": 3940, "epoch": 2 }, { "type": "loss", "content": 0.008940416388213634, "timestamp": "2025-09-10 02:29:56.913290", "step": 3941, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:56.943191", "step": 3941, "epoch": 2 }, { "type": "loss", "content": 0.0021927556954324245, "timestamp": "2025-09-10 02:29:56.945014", "step": 3942, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:56.974085", "step": 3942, "epoch": 2 }, { "type": "loss", "content": 0.004246424417942762, "timestamp": "2025-09-10 02:29:56.975850", "step": 3943, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:57.004702", "step": 3943, "epoch": 2 }, { "type": "loss", "content": 0.002561484929174185, "timestamp": "2025-09-10 02:29:57.028352", "step": 3944, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:57.056951", "step": 3944, "epoch": 2 }, { "type": "loss", "content": 0.015105332247912884, "timestamp": "2025-09-10 02:29:57.058809", "step": 3945, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:57.088690", "step": 3945, "epoch": 2 }, { "type": "loss", "content": 0.03435353934764862, "timestamp": "2025-09-10 02:29:57.090499", "step": 3946, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:57.119919", "step": 3946, "epoch": 2 }, { "type": "loss", "content": 0.025656159967184067, "timestamp": "2025-09-10 02:29:57.121962", "step": 3947, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:57.151240", "step": 3947, "epoch": 2 }, { "type": "loss", "content": 0.020980658009648323, "timestamp": "2025-09-10 02:29:57.174771", "step": 3948, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:57.204333", "step": 3948, "epoch": 2 }, { "type": "loss", "content": 0.06358266621828079, "timestamp": "2025-09-10 02:29:57.206328", "step": 3949, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:57.235447", "step": 3949, "epoch": 2 }, { "type": "loss", "content": 0.03147200122475624, "timestamp": "2025-09-10 02:29:57.237403", "step": 3950, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:57.267036", "step": 3950, "epoch": 2 }, { "type": "loss", "content": 0.010876178741455078, "timestamp": "2025-09-10 02:29:57.268894", "step": 3951, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:57.297293", "step": 3951, "epoch": 2 }, { "type": "loss", "content": 0.0009858089033514261, "timestamp": "2025-09-10 02:29:57.320646", "step": 3952, "epoch": 2 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 96 ], "batch_size": 8, "flops": 1898404492032 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 5, 80 ], "batch_size": 8, "flops": 1582003754624 } ], "timestamp": "2025-09-10 02:29:59.253867", "step": 3952, "epoch": 2 }, { "type": "pplx", "content": 2615495.2424767786, "timestamp": "2025-09-10 02:29:59.255630", "step": 3952, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:59.284200", "step": 3952, "epoch": 2 }, { "type": "loss", "content": 0.0033627322409301996, "timestamp": "2025-09-10 02:29:59.285977", "step": 3953, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:59.319115", "step": 3953, "epoch": 2 }, { "type": "loss", "content": 0.0013119837967678905, "timestamp": "2025-09-10 02:29:59.320955", "step": 3954, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:59.349942", "step": 3954, "epoch": 2 }, { "type": "loss", "content": 0.014528962783515453, "timestamp": "2025-09-10 02:29:59.351674", "step": 3955, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:59.380871", "step": 3955, "epoch": 2 }, { "type": "loss", "content": 0.017321297898888588, "timestamp": "2025-09-10 02:29:59.404664", "step": 3956, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:59.433420", "step": 3956, "epoch": 2 }, { "type": "loss", "content": 0.01338347140699625, "timestamp": "2025-09-10 02:29:59.435589", "step": 3957, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:59.465072", "step": 3957, "epoch": 2 }, { "type": "loss", "content": 0.003248179331421852, "timestamp": "2025-09-10 02:29:59.466793", "step": 3958, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:59.495807", "step": 3958, "epoch": 2 }, { "type": "loss", "content": 0.002999834483489394, "timestamp": "2025-09-10 02:29:59.497787", "step": 3959, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:59.526592", "step": 3959, "epoch": 2 }, { "type": "loss", "content": 0.012748440727591515, "timestamp": "2025-09-10 02:29:59.549906", "step": 3960, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:59.579143", "step": 3960, "epoch": 2 }, { "type": "loss", "content": 0.014087346382439137, "timestamp": "2025-09-10 02:29:59.580887", "step": 3961, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:59.610044", "step": 3961, "epoch": 2 }, { "type": "loss", "content": 0.052782002836465836, "timestamp": "2025-09-10 02:29:59.612039", "step": 3962, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:59.640701", "step": 3962, "epoch": 2 }, { "type": "loss", "content": 0.04501364752650261, "timestamp": "2025-09-10 02:29:59.642455", "step": 3963, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:59.670906", "step": 3963, "epoch": 2 }, { "type": "loss", "content": 0.028359297662973404, "timestamp": "2025-09-10 02:29:59.694504", "step": 3964, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:29:59.727179", "step": 3964, "epoch": 2 }, { "type": "loss", "content": 0.0044990344904363155, "timestamp": "2025-09-10 02:29:59.729155", "step": 3965, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:59.758657", "step": 3965, "epoch": 2 }, { "type": "loss", "content": 0.0017106970772147179, "timestamp": "2025-09-10 02:29:59.760672", "step": 3966, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:59.789864", "step": 3966, "epoch": 2 }, { "type": "loss", "content": 0.026996374130249023, "timestamp": "2025-09-10 02:29:59.792058", "step": 3967, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:29:59.820716", "step": 3967, "epoch": 2 }, { "type": "loss", "content": 0.01697719469666481, "timestamp": "2025-09-10 02:29:59.844102", "step": 3968, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:59.873516", "step": 3968, "epoch": 2 }, { "type": "loss", "content": 0.0038953584153205156, "timestamp": "2025-09-10 02:29:59.875597", "step": 3969, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:59.904591", "step": 3969, "epoch": 2 }, { "type": "loss", "content": 0.020404910668730736, "timestamp": "2025-09-10 02:29:59.906425", "step": 3970, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:59.935596", "step": 3970, "epoch": 2 }, { "type": "loss", "content": 0.002622651169076562, "timestamp": "2025-09-10 02:29:59.937494", "step": 3971, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:29:59.966490", "step": 3971, "epoch": 2 }, { "type": "loss", "content": 0.0038400557823479176, "timestamp": "2025-09-10 02:29:59.990075", "step": 3972, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:00.019724", "step": 3972, "epoch": 2 }, { "type": "loss", "content": 0.005181272979825735, "timestamp": "2025-09-10 02:30:00.021447", "step": 3973, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:00.051342", "step": 3973, "epoch": 2 }, { "type": "loss", "content": 0.008785213343799114, "timestamp": "2025-09-10 02:30:00.053332", "step": 3974, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:00.082694", "step": 3974, "epoch": 2 }, { "type": "loss", "content": 0.00593586964532733, "timestamp": "2025-09-10 02:30:00.084519", "step": 3975, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:30:00.114700", "step": 3975, "epoch": 2 }, { "type": "loss", "content": 0.014440938830375671, "timestamp": "2025-09-10 02:30:00.138431", "step": 3976, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:00.167770", "step": 3976, "epoch": 2 }, { "type": "loss", "content": 0.02001953311264515, "timestamp": "2025-09-10 02:30:00.169656", "step": 3977, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:00.200231", "step": 3977, "epoch": 2 }, { "type": "loss", "content": 0.002950613619759679, "timestamp": "2025-09-10 02:30:00.202410", "step": 3978, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:00.231642", "step": 3978, "epoch": 2 }, { "type": "loss", "content": 0.0004510123108047992, "timestamp": "2025-09-10 02:30:00.233670", "step": 3979, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:00.262401", "step": 3979, "epoch": 2 }, { "type": "loss", "content": 0.018364567309617996, "timestamp": "2025-09-10 02:30:00.285923", "step": 3980, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:00.316267", "step": 3980, "epoch": 2 }, { "type": "loss", "content": 0.0015379964606836438, "timestamp": "2025-09-10 02:30:00.318459", "step": 3981, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:00.348082", "step": 3981, "epoch": 2 }, { "type": "loss", "content": 0.006171368528157473, "timestamp": "2025-09-10 02:30:00.350030", "step": 3982, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:00.378518", "step": 3982, "epoch": 2 }, { "type": "loss", "content": 0.0326349250972271, "timestamp": "2025-09-10 02:30:00.380537", "step": 3983, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:00.409402", "step": 3983, "epoch": 2 }, { "type": "loss", "content": 0.001550512621179223, "timestamp": "2025-09-10 02:30:00.432920", "step": 3984, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:00.466017", "step": 3984, "epoch": 2 }, { "type": "loss", "content": 0.014622099697589874, "timestamp": "2025-09-10 02:30:00.468062", "step": 3985, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:30:00.496905", "step": 3985, "epoch": 2 }, { "type": "loss", "content": 0.011592509225010872, "timestamp": "2025-09-10 02:30:00.499205", "step": 3986, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:00.528933", "step": 3986, "epoch": 2 }, { "type": "loss", "content": 0.0030130650848150253, "timestamp": "2025-09-10 02:30:00.530600", "step": 3987, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:00.559724", "step": 3987, "epoch": 2 }, { "type": "loss", "content": 0.002759770257398486, "timestamp": "2025-09-10 02:30:00.583184", "step": 3988, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:00.612700", "step": 3988, "epoch": 2 }, { "type": "loss", "content": 0.001730964402668178, "timestamp": "2025-09-10 02:30:00.614556", "step": 3989, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:00.643867", "step": 3989, "epoch": 2 }, { "type": "loss", "content": 0.002076812321320176, "timestamp": "2025-09-10 02:30:00.645839", "step": 3990, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:00.674781", "step": 3990, "epoch": 2 }, { "type": "loss", "content": 0.006203613709658384, "timestamp": "2025-09-10 02:30:00.677156", "step": 3991, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:00.706529", "step": 3991, "epoch": 2 }, { "type": "loss", "content": 0.014986904338002205, "timestamp": "2025-09-10 02:30:00.729828", "step": 3992, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:00.759586", "step": 3992, "epoch": 2 }, { "type": "loss", "content": 0.002898239064961672, "timestamp": "2025-09-10 02:30:00.762937", "step": 3993, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:00.793584", "step": 3993, "epoch": 2 }, { "type": "loss", "content": 0.021653810515999794, "timestamp": "2025-09-10 02:30:00.795930", "step": 3994, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:00.824585", "step": 3994, "epoch": 2 }, { "type": "loss", "content": 0.02008243091404438, "timestamp": "2025-09-10 02:30:00.826616", "step": 3995, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:00.855718", "step": 3995, "epoch": 2 }, { "type": "loss", "content": 0.0016952345613390207, "timestamp": "2025-09-10 02:30:00.879010", "step": 3996, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:00.908703", "step": 3996, "epoch": 2 }, { "type": "loss", "content": 0.0011763189686462283, "timestamp": "2025-09-10 02:30:00.910599", "step": 3997, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:00.939259", "step": 3997, "epoch": 2 }, { "type": "loss", "content": 0.006132784299552441, "timestamp": "2025-09-10 02:30:00.941170", "step": 3998, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:00.970037", "step": 3998, "epoch": 2 }, { "type": "loss", "content": 0.030746951699256897, "timestamp": "2025-09-10 02:30:00.971771", "step": 3999, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:30:01.001173", "step": 3999, "epoch": 2 }, { "type": "loss", "content": 0.004159748088568449, "timestamp": "2025-09-10 02:30:01.024858", "step": 4000, "epoch": 2 }, { "type": "info", "content": "Checkpoint saved at step 4000", "timestamp": "2025-09-10 02:30:05.389163", "step": 4000, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:05.423101", "step": 4000, "epoch": 2 }, { "type": "loss", "content": 0.037195395678281784, "timestamp": "2025-09-10 02:30:05.425038", "step": 4001, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:05.454720", "step": 4001, "epoch": 2 }, { "type": "loss", "content": 0.03162485733628273, "timestamp": "2025-09-10 02:30:05.456588", "step": 4002, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:05.485405", "step": 4002, "epoch": 2 }, { "type": "loss", "content": 0.0013071894645690918, "timestamp": "2025-09-10 02:30:05.487812", "step": 4003, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:05.517443", "step": 4003, "epoch": 2 }, { "type": "loss", "content": 0.0009963825577870011, "timestamp": "2025-09-10 02:30:05.540927", "step": 4004, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:05.570267", "step": 4004, "epoch": 2 }, { "type": "loss", "content": 0.011100312694907188, "timestamp": "2025-09-10 02:30:05.572225", "step": 4005, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:05.601282", "step": 4005, "epoch": 2 }, { "type": "loss", "content": 0.0045918854884803295, "timestamp": "2025-09-10 02:30:05.603669", "step": 4006, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:05.633206", "step": 4006, "epoch": 2 }, { "type": "loss", "content": 0.019447358325123787, "timestamp": "2025-09-10 02:30:05.635263", "step": 4007, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:05.664489", "step": 4007, "epoch": 2 }, { "type": "loss", "content": 0.0015611470444127917, "timestamp": "2025-09-10 02:30:05.688488", "step": 4008, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:05.718131", "step": 4008, "epoch": 2 }, { "type": "loss", "content": 0.046501822769641876, "timestamp": "2025-09-10 02:30:05.720993", "step": 4009, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:05.750228", "step": 4009, "epoch": 2 }, { "type": "loss", "content": 0.023632794618606567, "timestamp": "2025-09-10 02:30:05.752253", "step": 4010, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:05.781786", "step": 4010, "epoch": 2 }, { "type": "loss", "content": 0.03684673458337784, "timestamp": "2025-09-10 02:30:05.783548", "step": 4011, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:05.812482", "step": 4011, "epoch": 2 }, { "type": "loss", "content": 0.02587457373738289, "timestamp": "2025-09-10 02:30:05.836129", "step": 4012, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:05.865936", "step": 4012, "epoch": 2 }, { "type": "loss", "content": 0.06219131126999855, "timestamp": "2025-09-10 02:30:05.867998", "step": 4013, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:05.897526", "step": 4013, "epoch": 2 }, { "type": "loss", "content": 0.0007624986465089023, "timestamp": "2025-09-10 02:30:05.899678", "step": 4014, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:05.928773", "step": 4014, "epoch": 2 }, { "type": "loss", "content": 0.021604230627417564, "timestamp": "2025-09-10 02:30:05.931338", "step": 4015, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:05.961523", "step": 4015, "epoch": 2 }, { "type": "loss", "content": 0.034620076417922974, "timestamp": "2025-09-10 02:30:05.985695", "step": 4016, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:06.015508", "step": 4016, "epoch": 2 }, { "type": "loss", "content": 0.004367115441709757, "timestamp": "2025-09-10 02:30:06.017418", "step": 4017, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:06.046713", "step": 4017, "epoch": 2 }, { "type": "loss", "content": 0.05816395953297615, "timestamp": "2025-09-10 02:30:06.048794", "step": 4018, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:06.077931", "step": 4018, "epoch": 2 }, { "type": "loss", "content": 0.0014941880945116282, "timestamp": "2025-09-10 02:30:06.079899", "step": 4019, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:06.108947", "step": 4019, "epoch": 2 }, { "type": "loss", "content": 0.021339034661650658, "timestamp": "2025-09-10 02:30:06.132542", "step": 4020, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:06.162180", "step": 4020, "epoch": 2 }, { "type": "loss", "content": 0.016611723229289055, "timestamp": "2025-09-10 02:30:06.164273", "step": 4021, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:06.194192", "step": 4021, "epoch": 2 }, { "type": "loss", "content": 0.04329407960176468, "timestamp": "2025-09-10 02:30:06.196334", "step": 4022, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:06.225919", "step": 4022, "epoch": 2 }, { "type": "loss", "content": 0.0354154147207737, "timestamp": "2025-09-10 02:30:06.227832", "step": 4023, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:06.257145", "step": 4023, "epoch": 2 }, { "type": "loss", "content": 0.006620544008910656, "timestamp": "2025-09-10 02:30:06.280719", "step": 4024, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:06.310229", "step": 4024, "epoch": 2 }, { "type": "loss", "content": 0.011618427000939846, "timestamp": "2025-09-10 02:30:06.312331", "step": 4025, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:06.341609", "step": 4025, "epoch": 2 }, { "type": "loss", "content": 0.02478812262415886, "timestamp": "2025-09-10 02:30:06.343714", "step": 4026, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:06.372696", "step": 4026, "epoch": 2 }, { "type": "loss", "content": 0.029102688655257225, "timestamp": "2025-09-10 02:30:06.374686", "step": 4027, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:06.404049", "step": 4027, "epoch": 2 }, { "type": "loss", "content": 0.01803060993552208, "timestamp": "2025-09-10 02:30:06.427656", "step": 4028, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:06.457049", "step": 4028, "epoch": 2 }, { "type": "loss", "content": 0.019281534478068352, "timestamp": "2025-09-10 02:30:06.458956", "step": 4029, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:06.488395", "step": 4029, "epoch": 2 }, { "type": "loss", "content": 0.026226535439491272, "timestamp": "2025-09-10 02:30:06.490984", "step": 4030, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:06.520182", "step": 4030, "epoch": 2 }, { "type": "loss", "content": 0.04052482917904854, "timestamp": "2025-09-10 02:30:06.522024", "step": 4031, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:06.551280", "step": 4031, "epoch": 2 }, { "type": "loss", "content": 0.013854103162884712, "timestamp": "2025-09-10 02:30:06.575011", "step": 4032, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:06.604546", "step": 4032, "epoch": 2 }, { "type": "loss", "content": 0.00856628268957138, "timestamp": "2025-09-10 02:30:06.606638", "step": 4033, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:06.635760", "step": 4033, "epoch": 2 }, { "type": "loss", "content": 0.002442223485559225, "timestamp": "2025-09-10 02:30:06.637559", "step": 4034, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:06.666604", "step": 4034, "epoch": 2 }, { "type": "loss", "content": 0.014602691866457462, "timestamp": "2025-09-10 02:30:06.668550", "step": 4035, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:06.697829", "step": 4035, "epoch": 2 }, { "type": "loss", "content": 0.0062775383703410625, "timestamp": "2025-09-10 02:30:06.721315", "step": 4036, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:06.751115", "step": 4036, "epoch": 2 }, { "type": "loss", "content": 0.005031880922615528, "timestamp": "2025-09-10 02:30:06.752972", "step": 4037, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:06.781699", "step": 4037, "epoch": 2 }, { "type": "loss", "content": 0.025143790990114212, "timestamp": "2025-09-10 02:30:06.783719", "step": 4038, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:06.812823", "step": 4038, "epoch": 2 }, { "type": "loss", "content": 0.0027612613048404455, "timestamp": "2025-09-10 02:30:06.814844", "step": 4039, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:06.843592", "step": 4039, "epoch": 2 }, { "type": "loss", "content": 0.0623394213616848, "timestamp": "2025-09-10 02:30:06.867227", "step": 4040, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:06.896099", "step": 4040, "epoch": 2 }, { "type": "loss", "content": 0.019657397642731667, "timestamp": "2025-09-10 02:30:06.898140", "step": 4041, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:06.927452", "step": 4041, "epoch": 2 }, { "type": "loss", "content": 0.0016895385924726725, "timestamp": "2025-09-10 02:30:06.929561", "step": 4042, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:06.958634", "step": 4042, "epoch": 2 }, { "type": "loss", "content": 0.008014382794499397, "timestamp": "2025-09-10 02:30:06.960814", "step": 4043, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:06.990288", "step": 4043, "epoch": 2 }, { "type": "loss", "content": 0.001446551294066012, "timestamp": "2025-09-10 02:30:07.014198", "step": 4044, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:07.044378", "step": 4044, "epoch": 2 }, { "type": "loss", "content": 0.002983215032145381, "timestamp": "2025-09-10 02:30:07.047649", "step": 4045, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:07.077646", "step": 4045, "epoch": 2 }, { "type": "loss", "content": 0.005527123808860779, "timestamp": "2025-09-10 02:30:07.079781", "step": 4046, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:07.109245", "step": 4046, "epoch": 2 }, { "type": "loss", "content": 0.027898045256733894, "timestamp": "2025-09-10 02:30:07.111361", "step": 4047, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:07.141699", "step": 4047, "epoch": 2 }, { "type": "loss", "content": 0.04734370484948158, "timestamp": "2025-09-10 02:30:07.165618", "step": 4048, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:07.195633", "step": 4048, "epoch": 2 }, { "type": "loss", "content": 0.001743005239404738, "timestamp": "2025-09-10 02:30:07.197786", "step": 4049, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:07.227182", "step": 4049, "epoch": 2 }, { "type": "loss", "content": 0.028737738728523254, "timestamp": "2025-09-10 02:30:07.228938", "step": 4050, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:07.258128", "step": 4050, "epoch": 2 }, { "type": "loss", "content": 0.00825404841452837, "timestamp": "2025-09-10 02:30:07.259833", "step": 4051, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:07.289425", "step": 4051, "epoch": 2 }, { "type": "loss", "content": 0.018694309517741203, "timestamp": "2025-09-10 02:30:07.312986", "step": 4052, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:07.342138", "step": 4052, "epoch": 2 }, { "type": "loss", "content": 0.016908397898077965, "timestamp": "2025-09-10 02:30:07.344129", "step": 4053, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:07.372802", "step": 4053, "epoch": 2 }, { "type": "loss", "content": 0.04728404060006142, "timestamp": "2025-09-10 02:30:07.374655", "step": 4054, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:07.403481", "step": 4054, "epoch": 2 }, { "type": "loss", "content": 0.006686265114694834, "timestamp": "2025-09-10 02:30:07.405540", "step": 4055, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:07.434772", "step": 4055, "epoch": 2 }, { "type": "loss", "content": 0.0058333114720880985, "timestamp": "2025-09-10 02:30:07.458095", "step": 4056, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:07.488058", "step": 4056, "epoch": 2 }, { "type": "loss", "content": 0.0021211898420006037, "timestamp": "2025-09-10 02:30:07.490031", "step": 4057, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:07.519171", "step": 4057, "epoch": 2 }, { "type": "loss", "content": 0.008445287123322487, "timestamp": "2025-09-10 02:30:07.521344", "step": 4058, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:07.551285", "step": 4058, "epoch": 2 }, { "type": "loss", "content": 0.020289387553930283, "timestamp": "2025-09-10 02:30:07.553373", "step": 4059, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:07.583484", "step": 4059, "epoch": 2 }, { "type": "loss", "content": 0.03862438723444939, "timestamp": "2025-09-10 02:30:07.607228", "step": 4060, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:07.637647", "step": 4060, "epoch": 2 }, { "type": "loss", "content": 0.002592772478237748, "timestamp": "2025-09-10 02:30:07.639569", "step": 4061, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:07.669307", "step": 4061, "epoch": 2 }, { "type": "loss", "content": 0.011541393585503101, "timestamp": "2025-09-10 02:30:07.671123", "step": 4062, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:07.700850", "step": 4062, "epoch": 2 }, { "type": "loss", "content": 0.03516516089439392, "timestamp": "2025-09-10 02:30:07.702943", "step": 4063, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:07.733026", "step": 4063, "epoch": 2 }, { "type": "loss", "content": 0.035814959555864334, "timestamp": "2025-09-10 02:30:07.756584", "step": 4064, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:07.786384", "step": 4064, "epoch": 2 }, { "type": "loss", "content": 0.0230566393584013, "timestamp": "2025-09-10 02:30:07.788248", "step": 4065, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:07.817653", "step": 4065, "epoch": 2 }, { "type": "loss", "content": 0.0007291626534424722, "timestamp": "2025-09-10 02:30:07.819866", "step": 4066, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:07.849222", "step": 4066, "epoch": 2 }, { "type": "loss", "content": 0.002776853274554014, "timestamp": "2025-09-10 02:30:07.851347", "step": 4067, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:07.880501", "step": 4067, "epoch": 2 }, { "type": "loss", "content": 0.02804150991141796, "timestamp": "2025-09-10 02:30:07.904201", "step": 4068, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:07.933547", "step": 4068, "epoch": 2 }, { "type": "loss", "content": 0.0031647146679461002, "timestamp": "2025-09-10 02:30:07.935663", "step": 4069, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:07.964562", "step": 4069, "epoch": 2 }, { "type": "loss", "content": 0.00502938125282526, "timestamp": "2025-09-10 02:30:07.966579", "step": 4070, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:07.996083", "step": 4070, "epoch": 2 }, { "type": "loss", "content": 0.00889466144144535, "timestamp": "2025-09-10 02:30:07.997927", "step": 4071, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:08.026726", "step": 4071, "epoch": 2 }, { "type": "loss", "content": 0.006143052130937576, "timestamp": "2025-09-10 02:30:08.050391", "step": 4072, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:08.079703", "step": 4072, "epoch": 2 }, { "type": "loss", "content": 0.01753229834139347, "timestamp": "2025-09-10 02:30:08.081521", "step": 4073, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:08.111217", "step": 4073, "epoch": 2 }, { "type": "loss", "content": 0.01086281705647707, "timestamp": "2025-09-10 02:30:08.114070", "step": 4074, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:08.143042", "step": 4074, "epoch": 2 }, { "type": "loss", "content": 0.006167584098875523, "timestamp": "2025-09-10 02:30:08.145016", "step": 4075, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:08.174544", "step": 4075, "epoch": 2 }, { "type": "loss", "content": 0.00639066006988287, "timestamp": "2025-09-10 02:30:08.197774", "step": 4076, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:08.227485", "step": 4076, "epoch": 2 }, { "type": "loss", "content": 0.007022134959697723, "timestamp": "2025-09-10 02:30:08.231298", "step": 4077, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:08.263727", "step": 4077, "epoch": 2 }, { "type": "loss", "content": 0.01550102699548006, "timestamp": "2025-09-10 02:30:08.266372", "step": 4078, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:08.304523", "step": 4078, "epoch": 2 }, { "type": "loss", "content": 0.02558515965938568, "timestamp": "2025-09-10 02:30:08.312748", "step": 4079, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:08.347277", "step": 4079, "epoch": 2 }, { "type": "loss", "content": 0.018323184922337532, "timestamp": "2025-09-10 02:30:08.378126", "step": 4080, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:08.420699", "step": 4080, "epoch": 2 }, { "type": "loss", "content": 0.005207470618188381, "timestamp": "2025-09-10 02:30:08.428838", "step": 4081, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:08.464517", "step": 4081, "epoch": 2 }, { "type": "loss", "content": 0.003370558610185981, "timestamp": "2025-09-10 02:30:08.470802", "step": 4082, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:08.508999", "step": 4082, "epoch": 2 }, { "type": "loss", "content": 0.015834081918001175, "timestamp": "2025-09-10 02:30:08.512237", "step": 4083, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:08.548932", "step": 4083, "epoch": 2 }, { "type": "loss", "content": 0.010369435884058475, "timestamp": "2025-09-10 02:30:08.581100", "step": 4084, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:08.617985", "step": 4084, "epoch": 2 }, { "type": "loss", "content": 0.004316999111324549, "timestamp": "2025-09-10 02:30:08.623245", "step": 4085, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:08.665421", "step": 4085, "epoch": 2 }, { "type": "loss", "content": 0.015852492302656174, "timestamp": "2025-09-10 02:30:08.669507", "step": 4086, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:08.703154", "step": 4086, "epoch": 2 }, { "type": "loss", "content": 0.02087516523897648, "timestamp": "2025-09-10 02:30:08.711040", "step": 4087, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:08.750530", "step": 4087, "epoch": 2 }, { "type": "loss", "content": 0.0023121817503124475, "timestamp": "2025-09-10 02:30:08.783557", "step": 4088, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:08.829775", "step": 4088, "epoch": 2 }, { "type": "loss", "content": 0.057893071323633194, "timestamp": "2025-09-10 02:30:08.833852", "step": 4089, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:08.872849", "step": 4089, "epoch": 2 }, { "type": "loss", "content": 0.004587746690958738, "timestamp": "2025-09-10 02:30:08.879871", "step": 4090, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:08.920435", "step": 4090, "epoch": 2 }, { "type": "loss", "content": 0.007176821120083332, "timestamp": "2025-09-10 02:30:08.924410", "step": 4091, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:08.959251", "step": 4091, "epoch": 2 }, { "type": "loss", "content": 0.0320601612329483, "timestamp": "2025-09-10 02:30:08.984886", "step": 4092, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:09.020574", "step": 4092, "epoch": 2 }, { "type": "loss", "content": 0.024275433272123337, "timestamp": "2025-09-10 02:30:09.024886", "step": 4093, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:09.071153", "step": 4093, "epoch": 2 }, { "type": "loss", "content": 0.004208702128380537, "timestamp": "2025-09-10 02:30:09.078754", "step": 4094, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:09.111703", "step": 4094, "epoch": 2 }, { "type": "loss", "content": 0.004779213573783636, "timestamp": "2025-09-10 02:30:09.119917", "step": 4095, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:09.165195", "step": 4095, "epoch": 2 }, { "type": "loss", "content": 0.01595015451312065, "timestamp": "2025-09-10 02:30:09.191945", "step": 4096, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:09.221332", "step": 4096, "epoch": 2 }, { "type": "loss", "content": 0.012580021284520626, "timestamp": "2025-09-10 02:30:09.223564", "step": 4097, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:09.253415", "step": 4097, "epoch": 2 }, { "type": "loss", "content": 0.0033239300828427076, "timestamp": "2025-09-10 02:30:09.255822", "step": 4098, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:09.284782", "step": 4098, "epoch": 2 }, { "type": "loss", "content": 0.010019478388130665, "timestamp": "2025-09-10 02:30:09.286948", "step": 4099, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:09.316625", "step": 4099, "epoch": 2 }, { "type": "loss", "content": 0.010247709229588509, "timestamp": "2025-09-10 02:30:09.340804", "step": 4100, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:09.373804", "step": 4100, "epoch": 2 }, { "type": "loss", "content": 0.0005138172418810427, "timestamp": "2025-09-10 02:30:09.375810", "step": 4101, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:09.404939", "step": 4101, "epoch": 2 }, { "type": "loss", "content": 0.01599857024848461, "timestamp": "2025-09-10 02:30:09.406896", "step": 4102, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:09.436254", "step": 4102, "epoch": 2 }, { "type": "loss", "content": 0.00803226139396429, "timestamp": "2025-09-10 02:30:09.438182", "step": 4103, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:09.467500", "step": 4103, "epoch": 2 }, { "type": "loss", "content": 0.009947722777724266, "timestamp": "2025-09-10 02:30:09.490951", "step": 4104, "epoch": 2 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 96 ], "batch_size": 8, "flops": 1898404492032 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 5, 80 ], "batch_size": 8, "flops": 1582003754624 } ], "timestamp": "2025-09-10 02:30:11.387547", "step": 4104, "epoch": 2 }, { "type": "pplx", "content": 2449966.308638878, "timestamp": "2025-09-10 02:30:11.389688", "step": 4104, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:11.417968", "step": 4104, "epoch": 2 }, { "type": "loss", "content": 0.00043871457455679774, "timestamp": "2025-09-10 02:30:11.419785", "step": 4105, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:11.448864", "step": 4105, "epoch": 2 }, { "type": "loss", "content": 0.010353198274970055, "timestamp": "2025-09-10 02:30:11.450934", "step": 4106, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:11.480163", "step": 4106, "epoch": 2 }, { "type": "loss", "content": 0.0013044985244050622, "timestamp": "2025-09-10 02:30:11.481879", "step": 4107, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:11.510268", "step": 4107, "epoch": 2 }, { "type": "loss", "content": 0.021372154355049133, "timestamp": "2025-09-10 02:30:11.534076", "step": 4108, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:11.563293", "step": 4108, "epoch": 2 }, { "type": "loss", "content": 0.024162491783499718, "timestamp": "2025-09-10 02:30:11.565368", "step": 4109, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:11.594834", "step": 4109, "epoch": 2 }, { "type": "loss", "content": 0.008508929051458836, "timestamp": "2025-09-10 02:30:11.597061", "step": 4110, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:11.626197", "step": 4110, "epoch": 2 }, { "type": "loss", "content": 0.0076085105538368225, "timestamp": "2025-09-10 02:30:11.628426", "step": 4111, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:11.657190", "step": 4111, "epoch": 2 }, { "type": "loss", "content": 0.006037302315235138, "timestamp": "2025-09-10 02:30:11.681072", "step": 4112, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:11.710872", "step": 4112, "epoch": 2 }, { "type": "loss", "content": 0.001271507004275918, "timestamp": "2025-09-10 02:30:11.713047", "step": 4113, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:11.743044", "step": 4113, "epoch": 2 }, { "type": "loss", "content": 0.003994493279606104, "timestamp": "2025-09-10 02:30:11.747138", "step": 4114, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:11.777037", "step": 4114, "epoch": 2 }, { "type": "loss", "content": 0.03419547528028488, "timestamp": "2025-09-10 02:30:11.784158", "step": 4115, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:11.818507", "step": 4115, "epoch": 2 }, { "type": "loss", "content": 0.012027328833937645, "timestamp": "2025-09-10 02:30:11.853991", "step": 4116, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:11.890264", "step": 4116, "epoch": 2 }, { "type": "loss", "content": 0.024934466928243637, "timestamp": "2025-09-10 02:30:11.892812", "step": 4117, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:11.921809", "step": 4117, "epoch": 2 }, { "type": "loss", "content": 0.0010996349155902863, "timestamp": "2025-09-10 02:30:11.923882", "step": 4118, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:11.952927", "step": 4118, "epoch": 2 }, { "type": "loss", "content": 0.0031524065416306257, "timestamp": "2025-09-10 02:30:11.957780", "step": 4119, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:11.986421", "step": 4119, "epoch": 2 }, { "type": "loss", "content": 0.014573139138519764, "timestamp": "2025-09-10 02:30:12.010069", "step": 4120, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:12.038988", "step": 4120, "epoch": 2 }, { "type": "loss", "content": 0.008100205101072788, "timestamp": "2025-09-10 02:30:12.041085", "step": 4121, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:12.070339", "step": 4121, "epoch": 2 }, { "type": "loss", "content": 0.0010889542754739523, "timestamp": "2025-09-10 02:30:12.072207", "step": 4122, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:12.104805", "step": 4122, "epoch": 2 }, { "type": "loss", "content": 0.001875544898211956, "timestamp": "2025-09-10 02:30:12.107124", "step": 4123, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:12.137524", "step": 4123, "epoch": 2 }, { "type": "loss", "content": 0.0022023154888302088, "timestamp": "2025-09-10 02:30:12.164193", "step": 4124, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:12.201423", "step": 4124, "epoch": 2 }, { "type": "loss", "content": 0.004335514735430479, "timestamp": "2025-09-10 02:30:12.204110", "step": 4125, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:12.233130", "step": 4125, "epoch": 2 }, { "type": "loss", "content": 0.023035328835248947, "timestamp": "2025-09-10 02:30:12.234933", "step": 4126, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:12.264042", "step": 4126, "epoch": 2 }, { "type": "loss", "content": 0.0016248939791694283, "timestamp": "2025-09-10 02:30:12.265987", "step": 4127, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:12.294883", "step": 4127, "epoch": 2 }, { "type": "loss", "content": 0.06430184841156006, "timestamp": "2025-09-10 02:30:12.319059", "step": 4128, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:12.359017", "step": 4128, "epoch": 2 }, { "type": "loss", "content": 0.002125711413100362, "timestamp": "2025-09-10 02:30:12.361663", "step": 4129, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:12.395748", "step": 4129, "epoch": 2 }, { "type": "loss", "content": 0.0004497764748521149, "timestamp": "2025-09-10 02:30:12.397657", "step": 4130, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:12.426527", "step": 4130, "epoch": 2 }, { "type": "loss", "content": 0.010637232102453709, "timestamp": "2025-09-10 02:30:12.428505", "step": 4131, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:12.457293", "step": 4131, "epoch": 2 }, { "type": "loss", "content": 0.0022402710746973753, "timestamp": "2025-09-10 02:30:12.480738", "step": 4132, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:12.510161", "step": 4132, "epoch": 2 }, { "type": "loss", "content": 0.0011779237538576126, "timestamp": "2025-09-10 02:30:12.524879", "step": 4133, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:12.564302", "step": 4133, "epoch": 2 }, { "type": "loss", "content": 0.01739475503563881, "timestamp": "2025-09-10 02:30:12.566080", "step": 4134, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:12.597284", "step": 4134, "epoch": 2 }, { "type": "loss", "content": 0.0007681874558329582, "timestamp": "2025-09-10 02:30:12.599319", "step": 4135, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:12.629535", "step": 4135, "epoch": 2 }, { "type": "loss", "content": 0.01480245403945446, "timestamp": "2025-09-10 02:30:12.653290", "step": 4136, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:12.682156", "step": 4136, "epoch": 2 }, { "type": "loss", "content": 0.0004298565909266472, "timestamp": "2025-09-10 02:30:12.685402", "step": 4137, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:12.717830", "step": 4137, "epoch": 2 }, { "type": "loss", "content": 0.0003197753394488245, "timestamp": "2025-09-10 02:30:12.719681", "step": 4138, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:12.748681", "step": 4138, "epoch": 2 }, { "type": "loss", "content": 0.0436149537563324, "timestamp": "2025-09-10 02:30:12.759945", "step": 4139, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:12.796877", "step": 4139, "epoch": 2 }, { "type": "loss", "content": 0.001417894964106381, "timestamp": "2025-09-10 02:30:12.820334", "step": 4140, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:12.850046", "step": 4140, "epoch": 2 }, { "type": "loss", "content": 0.00156626314856112, "timestamp": "2025-09-10 02:30:12.852251", "step": 4141, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:12.882710", "step": 4141, "epoch": 2 }, { "type": "loss", "content": 0.009736338630318642, "timestamp": "2025-09-10 02:30:12.884690", "step": 4142, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:12.918107", "step": 4142, "epoch": 2 }, { "type": "loss", "content": 0.007936223410069942, "timestamp": "2025-09-10 02:30:12.919915", "step": 4143, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:12.950349", "step": 4143, "epoch": 2 }, { "type": "loss", "content": 0.00017661228775978088, "timestamp": "2025-09-10 02:30:12.975296", "step": 4144, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:13.004870", "step": 4144, "epoch": 2 }, { "type": "loss", "content": 0.00022023404017090797, "timestamp": "2025-09-10 02:30:13.006891", "step": 4145, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:30:13.035969", "step": 4145, "epoch": 2 }, { "type": "loss", "content": 0.00031622167443856597, "timestamp": "2025-09-10 02:30:13.037852", "step": 4146, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:13.078489", "step": 4146, "epoch": 2 }, { "type": "loss", "content": 0.03914717584848404, "timestamp": "2025-09-10 02:30:13.080395", "step": 4147, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:13.127488", "step": 4147, "epoch": 2 }, { "type": "loss", "content": 0.002464776625856757, "timestamp": "2025-09-10 02:30:13.154753", "step": 4148, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:13.190110", "step": 4148, "epoch": 2 }, { "type": "loss", "content": 0.0040261102840304375, "timestamp": "2025-09-10 02:30:13.192118", "step": 4149, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:13.222773", "step": 4149, "epoch": 2 }, { "type": "loss", "content": 0.014154909178614616, "timestamp": "2025-09-10 02:30:13.224848", "step": 4150, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:13.254083", "step": 4150, "epoch": 2 }, { "type": "loss", "content": 0.0015958038857206702, "timestamp": "2025-09-10 02:30:13.256099", "step": 4151, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:13.285702", "step": 4151, "epoch": 2 }, { "type": "loss", "content": 0.0005385707481764257, "timestamp": "2025-09-10 02:30:13.309381", "step": 4152, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:13.341823", "step": 4152, "epoch": 2 }, { "type": "loss", "content": 0.0009124533389694989, "timestamp": "2025-09-10 02:30:13.344028", "step": 4153, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:13.374630", "step": 4153, "epoch": 2 }, { "type": "loss", "content": 0.008126592263579369, "timestamp": "2025-09-10 02:30:13.378552", "step": 4154, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:13.407949", "step": 4154, "epoch": 2 }, { "type": "loss", "content": 0.023636531084775925, "timestamp": "2025-09-10 02:30:13.409958", "step": 4155, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:13.439545", "step": 4155, "epoch": 2 }, { "type": "loss", "content": 0.029642626643180847, "timestamp": "2025-09-10 02:30:13.464968", "step": 4156, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:13.494112", "step": 4156, "epoch": 2 }, { "type": "loss", "content": 0.0045542968437075615, "timestamp": "2025-09-10 02:30:13.495846", "step": 4157, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:13.524626", "step": 4157, "epoch": 2 }, { "type": "loss", "content": 0.019587215036153793, "timestamp": "2025-09-10 02:30:13.526673", "step": 4158, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:13.561391", "step": 4158, "epoch": 2 }, { "type": "loss", "content": 0.015651974827051163, "timestamp": "2025-09-10 02:30:13.565002", "step": 4159, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:13.598546", "step": 4159, "epoch": 2 }, { "type": "loss", "content": 0.038601044565439224, "timestamp": "2025-09-10 02:30:13.622042", "step": 4160, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:13.653561", "step": 4160, "epoch": 2 }, { "type": "loss", "content": 0.001739536295644939, "timestamp": "2025-09-10 02:30:13.655309", "step": 4161, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:13.684721", "step": 4161, "epoch": 2 }, { "type": "loss", "content": 0.006266446318477392, "timestamp": "2025-09-10 02:30:13.686939", "step": 4162, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:13.723160", "step": 4162, "epoch": 2 }, { "type": "loss", "content": 0.015138087794184685, "timestamp": "2025-09-10 02:30:13.725148", "step": 4163, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:13.754529", "step": 4163, "epoch": 2 }, { "type": "loss", "content": 0.003117672633379698, "timestamp": "2025-09-10 02:30:13.779269", "step": 4164, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:13.808144", "step": 4164, "epoch": 2 }, { "type": "loss", "content": 0.007980624213814735, "timestamp": "2025-09-10 02:30:13.810139", "step": 4165, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:13.840569", "step": 4165, "epoch": 2 }, { "type": "loss", "content": 0.0037680792156606913, "timestamp": "2025-09-10 02:30:13.842676", "step": 4166, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:30:13.874020", "step": 4166, "epoch": 2 }, { "type": "loss", "content": 0.0011201956076547503, "timestamp": "2025-09-10 02:30:13.875914", "step": 4167, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:13.904770", "step": 4167, "epoch": 2 }, { "type": "loss", "content": 0.015456787310540676, "timestamp": "2025-09-10 02:30:13.928479", "step": 4168, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:30:13.957777", "step": 4168, "epoch": 2 }, { "type": "loss", "content": 0.0004954669857397676, "timestamp": "2025-09-10 02:30:13.959967", "step": 4169, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:13.988843", "step": 4169, "epoch": 2 }, { "type": "loss", "content": 0.0020157850813120604, "timestamp": "2025-09-10 02:30:13.990643", "step": 4170, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:14.020185", "step": 4170, "epoch": 2 }, { "type": "loss", "content": 0.020275894552469254, "timestamp": "2025-09-10 02:30:14.022498", "step": 4171, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:14.051651", "step": 4171, "epoch": 2 }, { "type": "loss", "content": 0.005076683592051268, "timestamp": "2025-09-10 02:30:14.074854", "step": 4172, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:14.104260", "step": 4172, "epoch": 2 }, { "type": "loss", "content": 0.013356396928429604, "timestamp": "2025-09-10 02:30:14.106007", "step": 4173, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:14.134831", "step": 4173, "epoch": 2 }, { "type": "loss", "content": 0.023072008043527603, "timestamp": "2025-09-10 02:30:14.136593", "step": 4174, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:14.165929", "step": 4174, "epoch": 2 }, { "type": "loss", "content": 0.0010532446904107928, "timestamp": "2025-09-10 02:30:14.167837", "step": 4175, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:14.197056", "step": 4175, "epoch": 2 }, { "type": "loss", "content": 0.00019865957438014448, "timestamp": "2025-09-10 02:30:14.220909", "step": 4176, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:14.250441", "step": 4176, "epoch": 2 }, { "type": "loss", "content": 0.011448433622717857, "timestamp": "2025-09-10 02:30:14.252483", "step": 4177, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:14.281681", "step": 4177, "epoch": 2 }, { "type": "loss", "content": 0.001944276737049222, "timestamp": "2025-09-10 02:30:14.283845", "step": 4178, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:14.312664", "step": 4178, "epoch": 2 }, { "type": "loss", "content": 0.00034151505678892136, "timestamp": "2025-09-10 02:30:14.314810", "step": 4179, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:14.344090", "step": 4179, "epoch": 2 }, { "type": "loss", "content": 0.00792448129504919, "timestamp": "2025-09-10 02:30:14.367461", "step": 4180, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:14.397006", "step": 4180, "epoch": 2 }, { "type": "loss", "content": 0.0011224746704101562, "timestamp": "2025-09-10 02:30:14.399191", "step": 4181, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:14.428694", "step": 4181, "epoch": 2 }, { "type": "loss", "content": 0.0001815783034544438, "timestamp": "2025-09-10 02:30:14.430991", "step": 4182, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:14.460005", "step": 4182, "epoch": 2 }, { "type": "loss", "content": 0.0010939531493932009, "timestamp": "2025-09-10 02:30:14.462336", "step": 4183, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:14.492914", "step": 4183, "epoch": 2 }, { "type": "loss", "content": 0.010164668783545494, "timestamp": "2025-09-10 02:30:14.516399", "step": 4184, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:14.546375", "step": 4184, "epoch": 2 }, { "type": "loss", "content": 0.0004541015368886292, "timestamp": "2025-09-10 02:30:14.548297", "step": 4185, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:14.577534", "step": 4185, "epoch": 2 }, { "type": "loss", "content": 0.02757410891354084, "timestamp": "2025-09-10 02:30:14.581439", "step": 4186, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:14.612662", "step": 4186, "epoch": 2 }, { "type": "loss", "content": 0.0029620155692100525, "timestamp": "2025-09-10 02:30:14.614595", "step": 4187, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:14.643698", "step": 4187, "epoch": 2 }, { "type": "loss", "content": 0.03115958347916603, "timestamp": "2025-09-10 02:30:14.667753", "step": 4188, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:14.696704", "step": 4188, "epoch": 2 }, { "type": "loss", "content": 0.02462959475815296, "timestamp": "2025-09-10 02:30:14.699523", "step": 4189, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:14.728947", "step": 4189, "epoch": 2 }, { "type": "loss", "content": 0.008932234719395638, "timestamp": "2025-09-10 02:30:14.730988", "step": 4190, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:14.760011", "step": 4190, "epoch": 2 }, { "type": "loss", "content": 0.02919713594019413, "timestamp": "2025-09-10 02:30:14.762122", "step": 4191, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:14.791547", "step": 4191, "epoch": 2 }, { "type": "loss", "content": 0.0017417669296264648, "timestamp": "2025-09-10 02:30:14.815099", "step": 4192, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:14.844924", "step": 4192, "epoch": 2 }, { "type": "loss", "content": 0.003337780013680458, "timestamp": "2025-09-10 02:30:14.847005", "step": 4193, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:14.875835", "step": 4193, "epoch": 2 }, { "type": "loss", "content": 0.002467533340677619, "timestamp": "2025-09-10 02:30:14.877948", "step": 4194, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:14.907004", "step": 4194, "epoch": 2 }, { "type": "loss", "content": 0.0005795446340925992, "timestamp": "2025-09-10 02:30:14.909049", "step": 4195, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:14.938518", "step": 4195, "epoch": 2 }, { "type": "loss", "content": 0.0007786169881001115, "timestamp": "2025-09-10 02:30:14.961881", "step": 4196, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:14.990997", "step": 4196, "epoch": 2 }, { "type": "loss", "content": 0.003195093246176839, "timestamp": "2025-09-10 02:30:14.992995", "step": 4197, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:15.022041", "step": 4197, "epoch": 2 }, { "type": "loss", "content": 0.006263344548642635, "timestamp": "2025-09-10 02:30:15.024331", "step": 4198, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:15.053425", "step": 4198, "epoch": 2 }, { "type": "loss", "content": 0.0008713462157174945, "timestamp": "2025-09-10 02:30:15.055305", "step": 4199, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:15.084449", "step": 4199, "epoch": 2 }, { "type": "loss", "content": 0.017755350098013878, "timestamp": "2025-09-10 02:30:15.108050", "step": 4200, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:15.137303", "step": 4200, "epoch": 2 }, { "type": "loss", "content": 0.005264886189252138, "timestamp": "2025-09-10 02:30:15.139363", "step": 4201, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:15.169105", "step": 4201, "epoch": 2 }, { "type": "loss", "content": 0.00546374311670661, "timestamp": "2025-09-10 02:30:15.171006", "step": 4202, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:15.199895", "step": 4202, "epoch": 2 }, { "type": "loss", "content": 0.0007607027655467391, "timestamp": "2025-09-10 02:30:15.202063", "step": 4203, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:15.231114", "step": 4203, "epoch": 2 }, { "type": "loss", "content": 0.005106969736516476, "timestamp": "2025-09-10 02:30:15.254614", "step": 4204, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:15.284142", "step": 4204, "epoch": 2 }, { "type": "loss", "content": 0.004172091837972403, "timestamp": "2025-09-10 02:30:15.285955", "step": 4205, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:15.315039", "step": 4205, "epoch": 2 }, { "type": "loss", "content": 0.00981372781097889, "timestamp": "2025-09-10 02:30:15.317125", "step": 4206, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:15.346326", "step": 4206, "epoch": 2 }, { "type": "loss", "content": 0.004303614143282175, "timestamp": "2025-09-10 02:30:15.348388", "step": 4207, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:15.377635", "step": 4207, "epoch": 2 }, { "type": "loss", "content": 0.00102977582719177, "timestamp": "2025-09-10 02:30:15.400934", "step": 4208, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:15.430102", "step": 4208, "epoch": 2 }, { "type": "loss", "content": 0.020137446001172066, "timestamp": "2025-09-10 02:30:15.432394", "step": 4209, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:15.461473", "step": 4209, "epoch": 2 }, { "type": "loss", "content": 0.010775747708976269, "timestamp": "2025-09-10 02:30:15.463351", "step": 4210, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:15.491935", "step": 4210, "epoch": 2 }, { "type": "loss", "content": 0.0025170750450342894, "timestamp": "2025-09-10 02:30:15.493988", "step": 4211, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:30:15.522818", "step": 4211, "epoch": 2 }, { "type": "loss", "content": 0.005516665522009134, "timestamp": "2025-09-10 02:30:15.546569", "step": 4212, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:15.576204", "step": 4212, "epoch": 2 }, { "type": "loss", "content": 0.000951143098063767, "timestamp": "2025-09-10 02:30:15.578240", "step": 4213, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:15.607924", "step": 4213, "epoch": 2 }, { "type": "loss", "content": 0.0008516961825080216, "timestamp": "2025-09-10 02:30:15.609790", "step": 4214, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:15.638609", "step": 4214, "epoch": 2 }, { "type": "loss", "content": 0.04331762716174126, "timestamp": "2025-09-10 02:30:15.640841", "step": 4215, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:15.670108", "step": 4215, "epoch": 2 }, { "type": "loss", "content": 0.06728193908929825, "timestamp": "2025-09-10 02:30:15.693741", "step": 4216, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:15.723549", "step": 4216, "epoch": 2 }, { "type": "loss", "content": 0.0026774972211569548, "timestamp": "2025-09-10 02:30:15.725659", "step": 4217, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:15.755397", "step": 4217, "epoch": 2 }, { "type": "loss", "content": 0.003880779491737485, "timestamp": "2025-09-10 02:30:15.757539", "step": 4218, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:15.786812", "step": 4218, "epoch": 2 }, { "type": "loss", "content": 0.0013745512114837766, "timestamp": "2025-09-10 02:30:15.788962", "step": 4219, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:15.818124", "step": 4219, "epoch": 2 }, { "type": "loss", "content": 0.0004106538253836334, "timestamp": "2025-09-10 02:30:15.841688", "step": 4220, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:15.871171", "step": 4220, "epoch": 2 }, { "type": "loss", "content": 0.00025766046019271016, "timestamp": "2025-09-10 02:30:15.873415", "step": 4221, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:15.902496", "step": 4221, "epoch": 2 }, { "type": "loss", "content": 0.007885267026722431, "timestamp": "2025-09-10 02:30:15.904517", "step": 4222, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:15.933918", "step": 4222, "epoch": 2 }, { "type": "loss", "content": 0.00033780946978367865, "timestamp": "2025-09-10 02:30:15.936034", "step": 4223, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:15.964848", "step": 4223, "epoch": 2 }, { "type": "loss", "content": 0.012453519739210606, "timestamp": "2025-09-10 02:30:15.988501", "step": 4224, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:16.018114", "step": 4224, "epoch": 2 }, { "type": "loss", "content": 0.0007972000166773796, "timestamp": "2025-09-10 02:30:16.020157", "step": 4225, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:16.049596", "step": 4225, "epoch": 2 }, { "type": "loss", "content": 0.0008176401606760919, "timestamp": "2025-09-10 02:30:16.051534", "step": 4226, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:16.080591", "step": 4226, "epoch": 2 }, { "type": "loss", "content": 0.0032796438317745924, "timestamp": "2025-09-10 02:30:16.082988", "step": 4227, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:16.112213", "step": 4227, "epoch": 2 }, { "type": "loss", "content": 0.002389345783740282, "timestamp": "2025-09-10 02:30:16.135785", "step": 4228, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:16.165456", "step": 4228, "epoch": 2 }, { "type": "loss", "content": 0.00020894188492093235, "timestamp": "2025-09-10 02:30:16.167503", "step": 4229, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:16.196451", "step": 4229, "epoch": 2 }, { "type": "loss", "content": 0.009093990549445152, "timestamp": "2025-09-10 02:30:16.198623", "step": 4230, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:16.228120", "step": 4230, "epoch": 2 }, { "type": "loss", "content": 0.0009998397435992956, "timestamp": "2025-09-10 02:30:16.230550", "step": 4231, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:16.259696", "step": 4231, "epoch": 2 }, { "type": "loss", "content": 0.0010820318711921573, "timestamp": "2025-09-10 02:30:16.283185", "step": 4232, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:16.312859", "step": 4232, "epoch": 2 }, { "type": "loss", "content": 0.0028439860325306654, "timestamp": "2025-09-10 02:30:16.315029", "step": 4233, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:16.344140", "step": 4233, "epoch": 2 }, { "type": "loss", "content": 0.0011132077779620886, "timestamp": "2025-09-10 02:30:16.346217", "step": 4234, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:16.376167", "step": 4234, "epoch": 2 }, { "type": "loss", "content": 0.007233327720314264, "timestamp": "2025-09-10 02:30:16.378345", "step": 4235, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:16.407160", "step": 4235, "epoch": 2 }, { "type": "loss", "content": 0.026646794751286507, "timestamp": "2025-09-10 02:30:16.431310", "step": 4236, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:16.460332", "step": 4236, "epoch": 2 }, { "type": "loss", "content": 0.0025783206801861525, "timestamp": "2025-09-10 02:30:16.462424", "step": 4237, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:16.491788", "step": 4237, "epoch": 2 }, { "type": "loss", "content": 0.0003957781591452658, "timestamp": "2025-09-10 02:30:16.493605", "step": 4238, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:16.522360", "step": 4238, "epoch": 2 }, { "type": "loss", "content": 0.0038249988574534655, "timestamp": "2025-09-10 02:30:16.524716", "step": 4239, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:16.553918", "step": 4239, "epoch": 2 }, { "type": "loss", "content": 0.020384501665830612, "timestamp": "2025-09-10 02:30:16.577388", "step": 4240, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:16.606976", "step": 4240, "epoch": 2 }, { "type": "loss", "content": 0.0016678055981174111, "timestamp": "2025-09-10 02:30:16.609261", "step": 4241, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:16.638710", "step": 4241, "epoch": 2 }, { "type": "loss", "content": 0.0007309973007068038, "timestamp": "2025-09-10 02:30:16.640683", "step": 4242, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:16.669705", "step": 4242, "epoch": 2 }, { "type": "loss", "content": 0.0005048222956247628, "timestamp": "2025-09-10 02:30:16.671847", "step": 4243, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:16.700767", "step": 4243, "epoch": 2 }, { "type": "loss", "content": 0.03268107771873474, "timestamp": "2025-09-10 02:30:16.724174", "step": 4244, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:16.753375", "step": 4244, "epoch": 2 }, { "type": "loss", "content": 0.0005758335464634001, "timestamp": "2025-09-10 02:30:16.755270", "step": 4245, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:16.784658", "step": 4245, "epoch": 2 }, { "type": "loss", "content": 0.000780436210334301, "timestamp": "2025-09-10 02:30:16.786610", "step": 4246, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:16.820105", "step": 4246, "epoch": 2 }, { "type": "loss", "content": 0.03583789989352226, "timestamp": "2025-09-10 02:30:16.822404", "step": 4247, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:16.851476", "step": 4247, "epoch": 2 }, { "type": "loss", "content": 0.01857677660882473, "timestamp": "2025-09-10 02:30:16.874865", "step": 4248, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:16.905171", "step": 4248, "epoch": 2 }, { "type": "loss", "content": 0.00383751024492085, "timestamp": "2025-09-10 02:30:16.907683", "step": 4249, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:16.937595", "step": 4249, "epoch": 2 }, { "type": "loss", "content": 0.008934049867093563, "timestamp": "2025-09-10 02:30:16.939900", "step": 4250, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:16.970167", "step": 4250, "epoch": 2 }, { "type": "loss", "content": 0.00574998976662755, "timestamp": "2025-09-10 02:30:16.972182", "step": 4251, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:17.001387", "step": 4251, "epoch": 2 }, { "type": "loss", "content": 0.030393557623028755, "timestamp": "2025-09-10 02:30:17.024740", "step": 4252, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:17.054302", "step": 4252, "epoch": 2 }, { "type": "loss", "content": 0.006191540509462357, "timestamp": "2025-09-10 02:30:17.056165", "step": 4253, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:17.085071", "step": 4253, "epoch": 2 }, { "type": "loss", "content": 0.0160320233553648, "timestamp": "2025-09-10 02:30:17.087045", "step": 4254, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:17.116130", "step": 4254, "epoch": 2 }, { "type": "loss", "content": 0.0027475282549858093, "timestamp": "2025-09-10 02:30:17.118108", "step": 4255, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:17.146999", "step": 4255, "epoch": 2 }, { "type": "loss", "content": 0.00025519152404740453, "timestamp": "2025-09-10 02:30:17.170621", "step": 4256, "epoch": 2 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 96 ], "batch_size": 8, "flops": 1898404492032 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 5, 80 ], "batch_size": 8, "flops": 1582003754624 } ], "timestamp": "2025-09-10 02:30:19.209633", "step": 4256, "epoch": 2 }, { "type": "pplx", "content": 2666502.8418611083, "timestamp": "2025-09-10 02:30:19.214203", "step": 4256, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:19.242527", "step": 4256, "epoch": 2 }, { "type": "loss", "content": 0.004566500429064035, "timestamp": "2025-09-10 02:30:19.245068", "step": 4257, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:19.275303", "step": 4257, "epoch": 2 }, { "type": "loss", "content": 0.0001808918605092913, "timestamp": "2025-09-10 02:30:19.277146", "step": 4258, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:19.305919", "step": 4258, "epoch": 2 }, { "type": "loss", "content": 0.0013139968505129218, "timestamp": "2025-09-10 02:30:19.310650", "step": 4259, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:19.345186", "step": 4259, "epoch": 2 }, { "type": "loss", "content": 0.005284997168928385, "timestamp": "2025-09-10 02:30:19.368955", "step": 4260, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:19.400220", "step": 4260, "epoch": 2 }, { "type": "loss", "content": 0.005157732404768467, "timestamp": "2025-09-10 02:30:19.402305", "step": 4261, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:19.431676", "step": 4261, "epoch": 2 }, { "type": "loss", "content": 0.004276688676327467, "timestamp": "2025-09-10 02:30:19.433711", "step": 4262, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:19.463536", "step": 4262, "epoch": 2 }, { "type": "loss", "content": 0.006970508955419064, "timestamp": "2025-09-10 02:30:19.467108", "step": 4263, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:19.496949", "step": 4263, "epoch": 2 }, { "type": "loss", "content": 0.0032287253998219967, "timestamp": "2025-09-10 02:30:19.527243", "step": 4264, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:19.557984", "step": 4264, "epoch": 2 }, { "type": "loss", "content": 0.006718222517520189, "timestamp": "2025-09-10 02:30:19.559824", "step": 4265, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:19.588446", "step": 4265, "epoch": 2 }, { "type": "loss", "content": 0.05021723359823227, "timestamp": "2025-09-10 02:30:19.592381", "step": 4266, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:19.622417", "step": 4266, "epoch": 2 }, { "type": "loss", "content": 0.014577167108654976, "timestamp": "2025-09-10 02:30:19.624560", "step": 4267, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:19.653604", "step": 4267, "epoch": 2 }, { "type": "loss", "content": 0.006113509181886911, "timestamp": "2025-09-10 02:30:19.677278", "step": 4268, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:19.706570", "step": 4268, "epoch": 2 }, { "type": "loss", "content": 0.025011321529746056, "timestamp": "2025-09-10 02:30:19.709098", "step": 4269, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:19.741519", "step": 4269, "epoch": 2 }, { "type": "loss", "content": 0.0031119210179895163, "timestamp": "2025-09-10 02:30:19.744035", "step": 4270, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:19.781893", "step": 4270, "epoch": 2 }, { "type": "loss", "content": 0.0006158011383377016, "timestamp": "2025-09-10 02:30:19.784069", "step": 4271, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:19.813432", "step": 4271, "epoch": 2 }, { "type": "loss", "content": 0.00767070846632123, "timestamp": "2025-09-10 02:30:19.837905", "step": 4272, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:19.875696", "step": 4272, "epoch": 2 }, { "type": "loss", "content": 0.005195465870201588, "timestamp": "2025-09-10 02:30:19.877980", "step": 4273, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:19.907036", "step": 4273, "epoch": 2 }, { "type": "loss", "content": 0.021231412887573242, "timestamp": "2025-09-10 02:30:19.909450", "step": 4274, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:19.938870", "step": 4274, "epoch": 2 }, { "type": "loss", "content": 0.017993109300732613, "timestamp": "2025-09-10 02:30:19.941365", "step": 4275, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:19.977389", "step": 4275, "epoch": 2 }, { "type": "loss", "content": 0.0028719205874949694, "timestamp": "2025-09-10 02:30:20.000980", "step": 4276, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:20.030909", "step": 4276, "epoch": 2 }, { "type": "loss", "content": 0.0008749695844016969, "timestamp": "2025-09-10 02:30:20.032815", "step": 4277, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:20.061689", "step": 4277, "epoch": 2 }, { "type": "loss", "content": 0.05861704796552658, "timestamp": "2025-09-10 02:30:20.063723", "step": 4278, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:20.094393", "step": 4278, "epoch": 2 }, { "type": "loss", "content": 0.0005885810824111104, "timestamp": "2025-09-10 02:30:20.096630", "step": 4279, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:20.125499", "step": 4279, "epoch": 2 }, { "type": "loss", "content": 0.009852551855146885, "timestamp": "2025-09-10 02:30:20.150320", "step": 4280, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:20.185539", "step": 4280, "epoch": 2 }, { "type": "loss", "content": 0.0010334537364542484, "timestamp": "2025-09-10 02:30:20.187441", "step": 4281, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:20.219319", "step": 4281, "epoch": 2 }, { "type": "loss", "content": 0.0005859578959643841, "timestamp": "2025-09-10 02:30:20.221209", "step": 4282, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:20.250078", "step": 4282, "epoch": 2 }, { "type": "loss", "content": 0.01634986512362957, "timestamp": "2025-09-10 02:30:20.252211", "step": 4283, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:20.284667", "step": 4283, "epoch": 2 }, { "type": "loss", "content": 0.002268632873892784, "timestamp": "2025-09-10 02:30:20.308215", "step": 4284, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:20.339507", "step": 4284, "epoch": 2 }, { "type": "loss", "content": 0.0009845413733273745, "timestamp": "2025-09-10 02:30:20.341649", "step": 4285, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:20.372886", "step": 4285, "epoch": 2 }, { "type": "loss", "content": 0.00032462459057569504, "timestamp": "2025-09-10 02:30:20.374794", "step": 4286, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:20.405344", "step": 4286, "epoch": 2 }, { "type": "loss", "content": 0.021166039630770683, "timestamp": "2025-09-10 02:30:20.407618", "step": 4287, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:20.440660", "step": 4287, "epoch": 2 }, { "type": "loss", "content": 0.0007713089580647647, "timestamp": "2025-09-10 02:30:20.464792", "step": 4288, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:20.493892", "step": 4288, "epoch": 2 }, { "type": "loss", "content": 0.00042154494440183043, "timestamp": "2025-09-10 02:30:20.495786", "step": 4289, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:20.525203", "step": 4289, "epoch": 2 }, { "type": "loss", "content": 0.004018072970211506, "timestamp": "2025-09-10 02:30:20.527338", "step": 4290, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:20.556973", "step": 4290, "epoch": 2 }, { "type": "loss", "content": 0.00023954005155246705, "timestamp": "2025-09-10 02:30:20.559004", "step": 4291, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:20.588276", "step": 4291, "epoch": 2 }, { "type": "loss", "content": 0.009805425070226192, "timestamp": "2025-09-10 02:30:20.612048", "step": 4292, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:20.641664", "step": 4292, "epoch": 2 }, { "type": "loss", "content": 0.004415757488459349, "timestamp": "2025-09-10 02:30:20.645800", "step": 4293, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:20.677481", "step": 4293, "epoch": 2 }, { "type": "loss", "content": 0.0014099564868956804, "timestamp": "2025-09-10 02:30:20.680527", "step": 4294, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:20.709383", "step": 4294, "epoch": 2 }, { "type": "loss", "content": 0.009648052044212818, "timestamp": "2025-09-10 02:30:20.712168", "step": 4295, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:20.741594", "step": 4295, "epoch": 2 }, { "type": "loss", "content": 0.0007489831768907607, "timestamp": "2025-09-10 02:30:20.768093", "step": 4296, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:20.796886", "step": 4296, "epoch": 2 }, { "type": "loss", "content": 0.016056550666689873, "timestamp": "2025-09-10 02:30:20.799109", "step": 4297, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:20.828407", "step": 4297, "epoch": 2 }, { "type": "loss", "content": 0.0002837378706317395, "timestamp": "2025-09-10 02:30:20.830522", "step": 4298, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:20.862806", "step": 4298, "epoch": 2 }, { "type": "loss", "content": 0.017477823421359062, "timestamp": "2025-09-10 02:30:20.865409", "step": 4299, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:20.894602", "step": 4299, "epoch": 2 }, { "type": "loss", "content": 0.0015934448456391692, "timestamp": "2025-09-10 02:30:20.919782", "step": 4300, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:20.951413", "step": 4300, "epoch": 2 }, { "type": "loss", "content": 0.02498321607708931, "timestamp": "2025-09-10 02:30:20.954282", "step": 4301, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:20.986608", "step": 4301, "epoch": 2 }, { "type": "loss", "content": 0.0045504337176680565, "timestamp": "2025-09-10 02:30:20.988576", "step": 4302, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:21.017450", "step": 4302, "epoch": 2 }, { "type": "loss", "content": 0.0029134657233953476, "timestamp": "2025-09-10 02:30:21.019562", "step": 4303, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:21.054943", "step": 4303, "epoch": 2 }, { "type": "loss", "content": 0.006049790419638157, "timestamp": "2025-09-10 02:30:21.079094", "step": 4304, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:21.109704", "step": 4304, "epoch": 2 }, { "type": "loss", "content": 0.005422768648713827, "timestamp": "2025-09-10 02:30:21.111587", "step": 4305, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:21.140923", "step": 4305, "epoch": 2 }, { "type": "loss", "content": 0.019810587167739868, "timestamp": "2025-09-10 02:30:21.143115", "step": 4306, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:21.172567", "step": 4306, "epoch": 2 }, { "type": "loss", "content": 0.00017135367670562118, "timestamp": "2025-09-10 02:30:21.178457", "step": 4307, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:21.207927", "step": 4307, "epoch": 2 }, { "type": "loss", "content": 0.01794002763926983, "timestamp": "2025-09-10 02:30:21.231693", "step": 4308, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:21.261949", "step": 4308, "epoch": 2 }, { "type": "loss", "content": 0.0010666154557839036, "timestamp": "2025-09-10 02:30:21.264591", "step": 4309, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:21.295011", "step": 4309, "epoch": 2 }, { "type": "loss", "content": 0.009481683373451233, "timestamp": "2025-09-10 02:30:21.297113", "step": 4310, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:21.326376", "step": 4310, "epoch": 2 }, { "type": "loss", "content": 0.004097465891391039, "timestamp": "2025-09-10 02:30:21.329351", "step": 4311, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:21.358503", "step": 4311, "epoch": 2 }, { "type": "loss", "content": 0.004019964952021837, "timestamp": "2025-09-10 02:30:21.381963", "step": 4312, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:21.411944", "step": 4312, "epoch": 2 }, { "type": "loss", "content": 0.000150880150613375, "timestamp": "2025-09-10 02:30:21.414087", "step": 4313, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:21.443170", "step": 4313, "epoch": 2 }, { "type": "loss", "content": 0.00041646347381174564, "timestamp": "2025-09-10 02:30:21.445282", "step": 4314, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:21.473991", "step": 4314, "epoch": 2 }, { "type": "loss", "content": 0.0008233272237703204, "timestamp": "2025-09-10 02:30:21.476199", "step": 4315, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:21.505209", "step": 4315, "epoch": 2 }, { "type": "loss", "content": 0.004062996245920658, "timestamp": "2025-09-10 02:30:21.528556", "step": 4316, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:21.560246", "step": 4316, "epoch": 2 }, { "type": "loss", "content": 0.030979419127106667, "timestamp": "2025-09-10 02:30:21.564195", "step": 4317, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:21.593114", "step": 4317, "epoch": 2 }, { "type": "loss", "content": 0.00021808150631841272, "timestamp": "2025-09-10 02:30:21.595284", "step": 4318, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:21.624563", "step": 4318, "epoch": 2 }, { "type": "loss", "content": 0.0003848371852654964, "timestamp": "2025-09-10 02:30:21.626975", "step": 4319, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:21.662543", "step": 4319, "epoch": 2 }, { "type": "loss", "content": 0.005721138324588537, "timestamp": "2025-09-10 02:30:21.685839", "step": 4320, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:21.717253", "step": 4320, "epoch": 2 }, { "type": "loss", "content": 0.018453901633620262, "timestamp": "2025-09-10 02:30:21.719565", "step": 4321, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:21.749004", "step": 4321, "epoch": 2 }, { "type": "loss", "content": 0.0017978992545977235, "timestamp": "2025-09-10 02:30:21.753362", "step": 4322, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:21.783429", "step": 4322, "epoch": 2 }, { "type": "loss", "content": 0.038310449570417404, "timestamp": "2025-09-10 02:30:21.791944", "step": 4323, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:21.823634", "step": 4323, "epoch": 2 }, { "type": "loss", "content": 0.009124137461185455, "timestamp": "2025-09-10 02:30:21.847332", "step": 4324, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:21.877476", "step": 4324, "epoch": 2 }, { "type": "loss", "content": 0.016482409089803696, "timestamp": "2025-09-10 02:30:21.879859", "step": 4325, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:21.909288", "step": 4325, "epoch": 2 }, { "type": "loss", "content": 0.0018404892180114985, "timestamp": "2025-09-10 02:30:21.912990", "step": 4326, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:21.943616", "step": 4326, "epoch": 2 }, { "type": "loss", "content": 0.01833636499941349, "timestamp": "2025-09-10 02:30:21.945728", "step": 4327, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:21.975186", "step": 4327, "epoch": 2 }, { "type": "loss", "content": 0.008851002901792526, "timestamp": "2025-09-10 02:30:21.998697", "step": 4328, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:22.027828", "step": 4328, "epoch": 2 }, { "type": "loss", "content": 0.003933702129870653, "timestamp": "2025-09-10 02:30:22.032762", "step": 4329, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:22.065524", "step": 4329, "epoch": 2 }, { "type": "loss", "content": 0.04179712384939194, "timestamp": "2025-09-10 02:30:22.068944", "step": 4330, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:22.097699", "step": 4330, "epoch": 2 }, { "type": "loss", "content": 0.00019713399524334818, "timestamp": "2025-09-10 02:30:22.099843", "step": 4331, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:22.128888", "step": 4331, "epoch": 2 }, { "type": "loss", "content": 0.002089699497446418, "timestamp": "2025-09-10 02:30:22.154230", "step": 4332, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:22.183176", "step": 4332, "epoch": 2 }, { "type": "loss", "content": 0.005560423247516155, "timestamp": "2025-09-10 02:30:22.185334", "step": 4333, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:22.215316", "step": 4333, "epoch": 2 }, { "type": "loss", "content": 0.0005679992027580738, "timestamp": "2025-09-10 02:30:22.217848", "step": 4334, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:30:22.249359", "step": 4334, "epoch": 2 }, { "type": "loss", "content": 0.004473458975553513, "timestamp": "2025-09-10 02:30:22.251589", "step": 4335, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:22.281518", "step": 4335, "epoch": 2 }, { "type": "loss", "content": 0.0069739497266709805, "timestamp": "2025-09-10 02:30:22.305350", "step": 4336, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:22.335020", "step": 4336, "epoch": 2 }, { "type": "loss", "content": 0.0070192874409258366, "timestamp": "2025-09-10 02:30:22.337826", "step": 4337, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:22.366271", "step": 4337, "epoch": 2 }, { "type": "loss", "content": 0.0038986883591860533, "timestamp": "2025-09-10 02:30:22.384092", "step": 4338, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:22.415490", "step": 4338, "epoch": 2 }, { "type": "loss", "content": 0.012104692868888378, "timestamp": "2025-09-10 02:30:22.419742", "step": 4339, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:30:22.453813", "step": 4339, "epoch": 2 }, { "type": "loss", "content": 0.030272994190454483, "timestamp": "2025-09-10 02:30:22.477229", "step": 4340, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:22.509914", "step": 4340, "epoch": 2 }, { "type": "loss", "content": 0.002849965589120984, "timestamp": "2025-09-10 02:30:22.511765", "step": 4341, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:22.540016", "step": 4341, "epoch": 2 }, { "type": "loss", "content": 0.009622319601476192, "timestamp": "2025-09-10 02:30:22.542150", "step": 4342, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:22.572690", "step": 4342, "epoch": 2 }, { "type": "loss", "content": 0.0004333446267992258, "timestamp": "2025-09-10 02:30:22.574843", "step": 4343, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:22.630401", "step": 4343, "epoch": 2 }, { "type": "loss", "content": 0.0009035724797286093, "timestamp": "2025-09-10 02:30:22.653933", "step": 4344, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:22.684107", "step": 4344, "epoch": 2 }, { "type": "loss", "content": 0.003374104155227542, "timestamp": "2025-09-10 02:30:22.686961", "step": 4345, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:22.716053", "step": 4345, "epoch": 2 }, { "type": "loss", "content": 0.04673586040735245, "timestamp": "2025-09-10 02:30:22.719010", "step": 4346, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:22.749352", "step": 4346, "epoch": 2 }, { "type": "loss", "content": 0.05751964822411537, "timestamp": "2025-09-10 02:30:22.752578", "step": 4347, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:22.783129", "step": 4347, "epoch": 2 }, { "type": "loss", "content": 0.0013447256060317159, "timestamp": "2025-09-10 02:30:22.806782", "step": 4348, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:22.837528", "step": 4348, "epoch": 2 }, { "type": "loss", "content": 0.016711339354515076, "timestamp": "2025-09-10 02:30:22.839666", "step": 4349, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:22.871115", "step": 4349, "epoch": 2 }, { "type": "loss", "content": 0.0014064701972529292, "timestamp": "2025-09-10 02:30:22.873325", "step": 4350, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:22.906807", "step": 4350, "epoch": 2 }, { "type": "loss", "content": 0.0013334174873307347, "timestamp": "2025-09-10 02:30:22.908840", "step": 4351, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:22.938415", "step": 4351, "epoch": 2 }, { "type": "loss", "content": 0.0467769019305706, "timestamp": "2025-09-10 02:30:22.965037", "step": 4352, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:23.005987", "step": 4352, "epoch": 2 }, { "type": "loss", "content": 0.0005183253088034689, "timestamp": "2025-09-10 02:30:23.008119", "step": 4353, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:23.037685", "step": 4353, "epoch": 2 }, { "type": "loss", "content": 0.0013273023068904877, "timestamp": "2025-09-10 02:30:23.039665", "step": 4354, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:23.070285", "step": 4354, "epoch": 2 }, { "type": "loss", "content": 0.00047609535977244377, "timestamp": "2025-09-10 02:30:23.073785", "step": 4355, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:23.105178", "step": 4355, "epoch": 2 }, { "type": "loss", "content": 0.011602640151977539, "timestamp": "2025-09-10 02:30:23.130111", "step": 4356, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:23.160621", "step": 4356, "epoch": 2 }, { "type": "loss", "content": 0.01510648149996996, "timestamp": "2025-09-10 02:30:23.162606", "step": 4357, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:23.190891", "step": 4357, "epoch": 2 }, { "type": "loss", "content": 0.0038530982565134764, "timestamp": "2025-09-10 02:30:23.192880", "step": 4358, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:23.221886", "step": 4358, "epoch": 2 }, { "type": "loss", "content": 0.003956594504415989, "timestamp": "2025-09-10 02:30:23.224020", "step": 4359, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:23.252428", "step": 4359, "epoch": 2 }, { "type": "loss", "content": 0.006590021308511496, "timestamp": "2025-09-10 02:30:23.276409", "step": 4360, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:23.305598", "step": 4360, "epoch": 2 }, { "type": "loss", "content": 0.015391605906188488, "timestamp": "2025-09-10 02:30:23.308032", "step": 4361, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:23.337262", "step": 4361, "epoch": 2 }, { "type": "loss", "content": 0.0010404250351712108, "timestamp": "2025-09-10 02:30:23.339418", "step": 4362, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:23.370395", "step": 4362, "epoch": 2 }, { "type": "loss", "content": 0.0009703827672637999, "timestamp": "2025-09-10 02:30:23.372249", "step": 4363, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:23.402168", "step": 4363, "epoch": 2 }, { "type": "loss", "content": 0.030420567840337753, "timestamp": "2025-09-10 02:30:23.426540", "step": 4364, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:23.456881", "step": 4364, "epoch": 2 }, { "type": "loss", "content": 0.027547702193260193, "timestamp": "2025-09-10 02:30:23.459276", "step": 4365, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:23.488358", "step": 4365, "epoch": 2 }, { "type": "loss", "content": 0.04904058575630188, "timestamp": "2025-09-10 02:30:23.490640", "step": 4366, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:23.520113", "step": 4366, "epoch": 2 }, { "type": "loss", "content": 0.0006052871467545629, "timestamp": "2025-09-10 02:30:23.522319", "step": 4367, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:23.552152", "step": 4367, "epoch": 2 }, { "type": "loss", "content": 0.002918403595685959, "timestamp": "2025-09-10 02:30:23.576007", "step": 4368, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:23.607395", "step": 4368, "epoch": 2 }, { "type": "loss", "content": 0.0007114603067748249, "timestamp": "2025-09-10 02:30:23.609435", "step": 4369, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:23.639204", "step": 4369, "epoch": 2 }, { "type": "loss", "content": 0.0027193163987249136, "timestamp": "2025-09-10 02:30:23.641485", "step": 4370, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:23.672831", "step": 4370, "epoch": 2 }, { "type": "loss", "content": 0.00400652876123786, "timestamp": "2025-09-10 02:30:23.674952", "step": 4371, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:23.706334", "step": 4371, "epoch": 2 }, { "type": "loss", "content": 0.03902292251586914, "timestamp": "2025-09-10 02:30:23.730285", "step": 4372, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:23.771811", "step": 4372, "epoch": 2 }, { "type": "loss", "content": 0.002066724468022585, "timestamp": "2025-09-10 02:30:23.773573", "step": 4373, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:23.803950", "step": 4373, "epoch": 2 }, { "type": "loss", "content": 0.011587983928620815, "timestamp": "2025-09-10 02:30:23.806137", "step": 4374, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:23.835604", "step": 4374, "epoch": 2 }, { "type": "loss", "content": 0.033676352351903915, "timestamp": "2025-09-10 02:30:23.840345", "step": 4375, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:23.870719", "step": 4375, "epoch": 2 }, { "type": "loss", "content": 0.01650477945804596, "timestamp": "2025-09-10 02:30:23.894287", "step": 4376, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:23.923570", "step": 4376, "epoch": 2 }, { "type": "loss", "content": 0.0012162269558757544, "timestamp": "2025-09-10 02:30:23.925507", "step": 4377, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:23.965931", "step": 4377, "epoch": 2 }, { "type": "loss", "content": 0.04293219372630119, "timestamp": "2025-09-10 02:30:23.967876", "step": 4378, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:23.996689", "step": 4378, "epoch": 2 }, { "type": "loss", "content": 0.01945372484624386, "timestamp": "2025-09-10 02:30:23.998955", "step": 4379, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:24.028331", "step": 4379, "epoch": 2 }, { "type": "loss", "content": 0.0012687245616689324, "timestamp": "2025-09-10 02:30:24.054584", "step": 4380, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:24.091721", "step": 4380, "epoch": 2 }, { "type": "loss", "content": 0.03918248787522316, "timestamp": "2025-09-10 02:30:24.095187", "step": 4381, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:24.125416", "step": 4381, "epoch": 2 }, { "type": "loss", "content": 0.007243589963763952, "timestamp": "2025-09-10 02:30:24.127564", "step": 4382, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:24.156871", "step": 4382, "epoch": 2 }, { "type": "loss", "content": 0.00038471867446787655, "timestamp": "2025-09-10 02:30:24.159070", "step": 4383, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:24.188233", "step": 4383, "epoch": 2 }, { "type": "loss", "content": 0.006663180887699127, "timestamp": "2025-09-10 02:30:24.212548", "step": 4384, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:24.242870", "step": 4384, "epoch": 2 }, { "type": "loss", "content": 0.005792879965156317, "timestamp": "2025-09-10 02:30:24.245016", "step": 4385, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:24.274190", "step": 4385, "epoch": 2 }, { "type": "loss", "content": 0.016859596595168114, "timestamp": "2025-09-10 02:30:24.276207", "step": 4386, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:24.305400", "step": 4386, "epoch": 2 }, { "type": "loss", "content": 0.020246658474206924, "timestamp": "2025-09-10 02:30:24.307193", "step": 4387, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:24.336345", "step": 4387, "epoch": 2 }, { "type": "loss", "content": 0.01099295262247324, "timestamp": "2025-09-10 02:30:24.360108", "step": 4388, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:24.389583", "step": 4388, "epoch": 2 }, { "type": "loss", "content": 0.0008660271996632218, "timestamp": "2025-09-10 02:30:24.391765", "step": 4389, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:24.421318", "step": 4389, "epoch": 2 }, { "type": "loss", "content": 0.026865771040320396, "timestamp": "2025-09-10 02:30:24.423407", "step": 4390, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:24.452669", "step": 4390, "epoch": 2 }, { "type": "loss", "content": 0.05440434068441391, "timestamp": "2025-09-10 02:30:24.456462", "step": 4391, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:24.487706", "step": 4391, "epoch": 2 }, { "type": "loss", "content": 0.04076675325632095, "timestamp": "2025-09-10 02:30:24.511080", "step": 4392, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:24.540355", "step": 4392, "epoch": 2 }, { "type": "loss", "content": 0.012472431175410748, "timestamp": "2025-09-10 02:30:24.542405", "step": 4393, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:24.571483", "step": 4393, "epoch": 2 }, { "type": "loss", "content": 0.015914931893348694, "timestamp": "2025-09-10 02:30:24.575409", "step": 4394, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:24.608575", "step": 4394, "epoch": 2 }, { "type": "loss", "content": 0.0016718072583898902, "timestamp": "2025-09-10 02:30:24.613324", "step": 4395, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:24.642871", "step": 4395, "epoch": 2 }, { "type": "loss", "content": 0.0273450817912817, "timestamp": "2025-09-10 02:30:24.670084", "step": 4396, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:24.705022", "step": 4396, "epoch": 2 }, { "type": "loss", "content": 0.02937299944460392, "timestamp": "2025-09-10 02:30:24.707243", "step": 4397, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:24.736701", "step": 4397, "epoch": 2 }, { "type": "loss", "content": 0.018649373203516006, "timestamp": "2025-09-10 02:30:24.739354", "step": 4398, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:24.769007", "step": 4398, "epoch": 2 }, { "type": "loss", "content": 0.0013912491267547011, "timestamp": "2025-09-10 02:30:24.770836", "step": 4399, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:24.799806", "step": 4399, "epoch": 2 }, { "type": "loss", "content": 0.007849449291825294, "timestamp": "2025-09-10 02:30:24.824993", "step": 4400, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:24.854540", "step": 4400, "epoch": 2 }, { "type": "loss", "content": 0.00767852645367384, "timestamp": "2025-09-10 02:30:24.856739", "step": 4401, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:24.887985", "step": 4401, "epoch": 2 }, { "type": "loss", "content": 0.018767986446619034, "timestamp": "2025-09-10 02:30:24.889897", "step": 4402, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:24.918697", "step": 4402, "epoch": 2 }, { "type": "loss", "content": 0.02546873316168785, "timestamp": "2025-09-10 02:30:24.920829", "step": 4403, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:24.949982", "step": 4403, "epoch": 2 }, { "type": "loss", "content": 0.007949613966047764, "timestamp": "2025-09-10 02:30:24.973518", "step": 4404, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:25.002946", "step": 4404, "epoch": 2 }, { "type": "loss", "content": 0.010820974595844746, "timestamp": "2025-09-10 02:30:25.006193", "step": 4405, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:25.040733", "step": 4405, "epoch": 2 }, { "type": "loss", "content": 0.013274082913994789, "timestamp": "2025-09-10 02:30:25.042839", "step": 4406, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:25.074017", "step": 4406, "epoch": 2 }, { "type": "loss", "content": 0.029004592448472977, "timestamp": "2025-09-10 02:30:25.075920", "step": 4407, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:25.104443", "step": 4407, "epoch": 2 }, { "type": "loss", "content": 0.04145583137869835, "timestamp": "2025-09-10 02:30:25.128303", "step": 4408, "epoch": 2 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 96 ], "batch_size": 8, "flops": 1898404492032 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 5, 80 ], "batch_size": 8, "flops": 1582003754624 } ], "timestamp": "2025-09-10 02:30:27.121558", "step": 4408, "epoch": 2 }, { "type": "pplx", "content": 2366995.1777753704, "timestamp": "2025-09-10 02:30:27.123556", "step": 4408, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:27.151440", "step": 4408, "epoch": 2 }, { "type": "loss", "content": 0.007996621541678905, "timestamp": "2025-09-10 02:30:27.153505", "step": 4409, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:27.183492", "step": 4409, "epoch": 2 }, { "type": "loss", "content": 0.01478519756346941, "timestamp": "2025-09-10 02:30:27.185595", "step": 4410, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:27.215118", "step": 4410, "epoch": 2 }, { "type": "loss", "content": 0.016848096624016762, "timestamp": "2025-09-10 02:30:27.217141", "step": 4411, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:27.246691", "step": 4411, "epoch": 2 }, { "type": "loss", "content": 0.08180829137563705, "timestamp": "2025-09-10 02:30:27.270446", "step": 4412, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:27.299913", "step": 4412, "epoch": 2 }, { "type": "loss", "content": 0.014533833600580692, "timestamp": "2025-09-10 02:30:27.301939", "step": 4413, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:27.330877", "step": 4413, "epoch": 2 }, { "type": "loss", "content": 0.012547485530376434, "timestamp": "2025-09-10 02:30:27.333010", "step": 4414, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:27.362564", "step": 4414, "epoch": 2 }, { "type": "loss", "content": 0.03431883081793785, "timestamp": "2025-09-10 02:30:27.364594", "step": 4415, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:27.393735", "step": 4415, "epoch": 2 }, { "type": "loss", "content": 0.001154187018983066, "timestamp": "2025-09-10 02:30:27.417466", "step": 4416, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:27.446568", "step": 4416, "epoch": 2 }, { "type": "loss", "content": 0.047597140073776245, "timestamp": "2025-09-10 02:30:27.448783", "step": 4417, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:30:27.477889", "step": 4417, "epoch": 2 }, { "type": "loss", "content": 0.024733979254961014, "timestamp": "2025-09-10 02:30:27.479782", "step": 4418, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:27.508554", "step": 4418, "epoch": 2 }, { "type": "loss", "content": 0.013838161714375019, "timestamp": "2025-09-10 02:30:27.510667", "step": 4419, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:27.539714", "step": 4419, "epoch": 2 }, { "type": "loss", "content": 0.005546521861106157, "timestamp": "2025-09-10 02:30:27.563379", "step": 4420, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:27.592702", "step": 4420, "epoch": 2 }, { "type": "loss", "content": 0.008822837844491005, "timestamp": "2025-09-10 02:30:27.594546", "step": 4421, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:27.623827", "step": 4421, "epoch": 2 }, { "type": "loss", "content": 0.04214005544781685, "timestamp": "2025-09-10 02:30:27.626120", "step": 4422, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:27.655457", "step": 4422, "epoch": 2 }, { "type": "loss", "content": 0.02608082816004753, "timestamp": "2025-09-10 02:30:27.657492", "step": 4423, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:27.687543", "step": 4423, "epoch": 2 }, { "type": "loss", "content": 0.03527413681149483, "timestamp": "2025-09-10 02:30:27.710937", "step": 4424, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:27.740125", "step": 4424, "epoch": 2 }, { "type": "loss", "content": 0.009323501959443092, "timestamp": "2025-09-10 02:30:27.742325", "step": 4425, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:27.771628", "step": 4425, "epoch": 2 }, { "type": "loss", "content": 0.007076173089444637, "timestamp": "2025-09-10 02:30:27.775053", "step": 4426, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:27.804117", "step": 4426, "epoch": 2 }, { "type": "loss", "content": 0.014898846857249737, "timestamp": "2025-09-10 02:30:27.805960", "step": 4427, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:27.835122", "step": 4427, "epoch": 2 }, { "type": "loss", "content": 0.0012923700269311666, "timestamp": "2025-09-10 02:30:27.858521", "step": 4428, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:27.888869", "step": 4428, "epoch": 2 }, { "type": "loss", "content": 0.0009464005706831813, "timestamp": "2025-09-10 02:30:27.890954", "step": 4429, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:27.920388", "step": 4429, "epoch": 2 }, { "type": "loss", "content": 0.03467162325978279, "timestamp": "2025-09-10 02:30:27.922292", "step": 4430, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:27.952217", "step": 4430, "epoch": 2 }, { "type": "loss", "content": 0.0009034690447151661, "timestamp": "2025-09-10 02:30:27.954488", "step": 4431, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:27.984392", "step": 4431, "epoch": 2 }, { "type": "loss", "content": 0.004393530543893576, "timestamp": "2025-09-10 02:30:28.007999", "step": 4432, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:28.038094", "step": 4432, "epoch": 2 }, { "type": "loss", "content": 0.020813841372728348, "timestamp": "2025-09-10 02:30:28.039780", "step": 4433, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:28.068526", "step": 4433, "epoch": 2 }, { "type": "loss", "content": 0.0029184853192418814, "timestamp": "2025-09-10 02:30:28.070686", "step": 4434, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:28.099692", "step": 4434, "epoch": 2 }, { "type": "loss", "content": 0.018276767805218697, "timestamp": "2025-09-10 02:30:28.101828", "step": 4435, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:28.130932", "step": 4435, "epoch": 2 }, { "type": "loss", "content": 0.013618439435958862, "timestamp": "2025-09-10 02:30:28.154633", "step": 4436, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:28.184012", "step": 4436, "epoch": 2 }, { "type": "loss", "content": 0.01371348462998867, "timestamp": "2025-09-10 02:30:28.185963", "step": 4437, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:28.215496", "step": 4437, "epoch": 2 }, { "type": "loss", "content": 0.010511423461139202, "timestamp": "2025-09-10 02:30:28.217561", "step": 4438, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:28.247095", "step": 4438, "epoch": 2 }, { "type": "loss", "content": 0.016875606030225754, "timestamp": "2025-09-10 02:30:28.249478", "step": 4439, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:28.278798", "step": 4439, "epoch": 2 }, { "type": "loss", "content": 0.037644706666469574, "timestamp": "2025-09-10 02:30:28.302399", "step": 4440, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:28.332021", "step": 4440, "epoch": 2 }, { "type": "loss", "content": 0.014547375962138176, "timestamp": "2025-09-10 02:30:28.334181", "step": 4441, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:28.363933", "step": 4441, "epoch": 2 }, { "type": "loss", "content": 0.02772408537566662, "timestamp": "2025-09-10 02:30:28.365908", "step": 4442, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:30:28.395389", "step": 4442, "epoch": 2 }, { "type": "loss", "content": 0.00911302026361227, "timestamp": "2025-09-10 02:30:28.397521", "step": 4443, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:28.426455", "step": 4443, "epoch": 2 }, { "type": "loss", "content": 0.01764538884162903, "timestamp": "2025-09-10 02:30:28.450155", "step": 4444, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:28.479872", "step": 4444, "epoch": 2 }, { "type": "loss", "content": 0.013143041171133518, "timestamp": "2025-09-10 02:30:28.481596", "step": 4445, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:28.510649", "step": 4445, "epoch": 2 }, { "type": "loss", "content": 0.025145303457975388, "timestamp": "2025-09-10 02:30:28.512901", "step": 4446, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:28.542010", "step": 4446, "epoch": 2 }, { "type": "loss", "content": 0.0038895097095519304, "timestamp": "2025-09-10 02:30:28.544178", "step": 4447, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:28.574420", "step": 4447, "epoch": 2 }, { "type": "loss", "content": 0.005414228420704603, "timestamp": "2025-09-10 02:30:28.598255", "step": 4448, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:28.627847", "step": 4448, "epoch": 2 }, { "type": "loss", "content": 0.008694284595549107, "timestamp": "2025-09-10 02:30:28.629922", "step": 4449, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:28.658963", "step": 4449, "epoch": 2 }, { "type": "loss", "content": 0.015848910436034203, "timestamp": "2025-09-10 02:30:28.661076", "step": 4450, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:28.690141", "step": 4450, "epoch": 2 }, { "type": "loss", "content": 0.0003486702044028789, "timestamp": "2025-09-10 02:30:28.692275", "step": 4451, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:28.721573", "step": 4451, "epoch": 2 }, { "type": "loss", "content": 0.006905894260853529, "timestamp": "2025-09-10 02:30:28.745163", "step": 4452, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:28.774920", "step": 4452, "epoch": 2 }, { "type": "loss", "content": 0.0024056038819253445, "timestamp": "2025-09-10 02:30:28.777049", "step": 4453, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:28.806242", "step": 4453, "epoch": 2 }, { "type": "loss", "content": 0.0023369495756924152, "timestamp": "2025-09-10 02:30:28.808317", "step": 4454, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:28.837443", "step": 4454, "epoch": 2 }, { "type": "loss", "content": 0.006998989265412092, "timestamp": "2025-09-10 02:30:28.839539", "step": 4455, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:28.868436", "step": 4455, "epoch": 2 }, { "type": "loss", "content": 0.010632582008838654, "timestamp": "2025-09-10 02:30:28.891870", "step": 4456, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:28.920993", "step": 4456, "epoch": 2 }, { "type": "loss", "content": 0.004489853512495756, "timestamp": "2025-09-10 02:30:28.922815", "step": 4457, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:28.951863", "step": 4457, "epoch": 2 }, { "type": "loss", "content": 0.010355109348893166, "timestamp": "2025-09-10 02:30:28.953999", "step": 4458, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:28.983310", "step": 4458, "epoch": 2 }, { "type": "loss", "content": 0.010508539155125618, "timestamp": "2025-09-10 02:30:28.985297", "step": 4459, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:29.015560", "step": 4459, "epoch": 2 }, { "type": "loss", "content": 0.010677381418645382, "timestamp": "2025-09-10 02:30:29.039389", "step": 4460, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:29.068827", "step": 4460, "epoch": 2 }, { "type": "loss", "content": 0.0026452091988176107, "timestamp": "2025-09-10 02:30:29.071106", "step": 4461, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:29.101273", "step": 4461, "epoch": 2 }, { "type": "loss", "content": 0.0003370894701220095, "timestamp": "2025-09-10 02:30:29.103329", "step": 4462, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:29.132808", "step": 4462, "epoch": 2 }, { "type": "loss", "content": 0.005529838614165783, "timestamp": "2025-09-10 02:30:29.134900", "step": 4463, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:29.164009", "step": 4463, "epoch": 2 }, { "type": "loss", "content": 0.009987021796405315, "timestamp": "2025-09-10 02:30:29.187720", "step": 4464, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:29.216988", "step": 4464, "epoch": 2 }, { "type": "loss", "content": 0.029712708666920662, "timestamp": "2025-09-10 02:30:29.219119", "step": 4465, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:29.248201", "step": 4465, "epoch": 2 }, { "type": "loss", "content": 0.0027898624539375305, "timestamp": "2025-09-10 02:30:29.250252", "step": 4466, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:29.279434", "step": 4466, "epoch": 2 }, { "type": "loss", "content": 0.03034038282930851, "timestamp": "2025-09-10 02:30:29.281386", "step": 4467, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:29.310856", "step": 4467, "epoch": 2 }, { "type": "loss", "content": 0.019655631855130196, "timestamp": "2025-09-10 02:30:29.334421", "step": 4468, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:29.364090", "step": 4468, "epoch": 2 }, { "type": "loss", "content": 0.007132671773433685, "timestamp": "2025-09-10 02:30:29.366049", "step": 4469, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:29.395033", "step": 4469, "epoch": 2 }, { "type": "loss", "content": 0.008172462694346905, "timestamp": "2025-09-10 02:30:29.397137", "step": 4470, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:29.426163", "step": 4470, "epoch": 2 }, { "type": "loss", "content": 0.010540196672081947, "timestamp": "2025-09-10 02:30:29.428029", "step": 4471, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:29.458043", "step": 4471, "epoch": 2 }, { "type": "loss", "content": 0.019368048757314682, "timestamp": "2025-09-10 02:30:29.481897", "step": 4472, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:29.511612", "step": 4472, "epoch": 2 }, { "type": "loss", "content": 0.0010160019155591726, "timestamp": "2025-09-10 02:30:29.513703", "step": 4473, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:29.543244", "step": 4473, "epoch": 2 }, { "type": "loss", "content": 0.0021432091016322374, "timestamp": "2025-09-10 02:30:29.545374", "step": 4474, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:29.575057", "step": 4474, "epoch": 2 }, { "type": "loss", "content": 0.0005147705087438226, "timestamp": "2025-09-10 02:30:29.577302", "step": 4475, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:29.606609", "step": 4475, "epoch": 2 }, { "type": "loss", "content": 0.0011651624226942658, "timestamp": "2025-09-10 02:30:29.630465", "step": 4476, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:29.661370", "step": 4476, "epoch": 2 }, { "type": "loss", "content": 0.038272712379693985, "timestamp": "2025-09-10 02:30:29.663418", "step": 4477, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:29.693789", "step": 4477, "epoch": 2 }, { "type": "loss", "content": 0.01615484617650509, "timestamp": "2025-09-10 02:30:29.695889", "step": 4478, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:29.725712", "step": 4478, "epoch": 2 }, { "type": "loss", "content": 0.07328778505325317, "timestamp": "2025-09-10 02:30:29.727805", "step": 4479, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:29.757694", "step": 4479, "epoch": 2 }, { "type": "loss", "content": 0.00021645925880875438, "timestamp": "2025-09-10 02:30:29.781335", "step": 4480, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:29.811439", "step": 4480, "epoch": 2 }, { "type": "loss", "content": 0.0031695568468421698, "timestamp": "2025-09-10 02:30:29.813536", "step": 4481, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:29.842905", "step": 4481, "epoch": 2 }, { "type": "loss", "content": 0.005539781413972378, "timestamp": "2025-09-10 02:30:29.845036", "step": 4482, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:29.873992", "step": 4482, "epoch": 2 }, { "type": "loss", "content": 0.00024167521041817963, "timestamp": "2025-09-10 02:30:29.875968", "step": 4483, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:29.905330", "step": 4483, "epoch": 2 }, { "type": "loss", "content": 0.007639687974005938, "timestamp": "2025-09-10 02:30:29.929051", "step": 4484, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:29.958031", "step": 4484, "epoch": 2 }, { "type": "loss", "content": 0.01753472536802292, "timestamp": "2025-09-10 02:30:29.960094", "step": 4485, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:29.989210", "step": 4485, "epoch": 2 }, { "type": "loss", "content": 0.0038788598030805588, "timestamp": "2025-09-10 02:30:29.991038", "step": 4486, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:30.020193", "step": 4486, "epoch": 2 }, { "type": "loss", "content": 0.012585505843162537, "timestamp": "2025-09-10 02:30:30.022320", "step": 4487, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:30.051566", "step": 4487, "epoch": 2 }, { "type": "loss", "content": 0.0010783494217321277, "timestamp": "2025-09-10 02:30:30.075099", "step": 4488, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:30.105465", "step": 4488, "epoch": 2 }, { "type": "loss", "content": 0.0025533498264849186, "timestamp": "2025-09-10 02:30:30.107544", "step": 4489, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:30.137397", "step": 4489, "epoch": 2 }, { "type": "loss", "content": 0.01380517240613699, "timestamp": "2025-09-10 02:30:30.139623", "step": 4490, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:30.170262", "step": 4490, "epoch": 2 }, { "type": "loss", "content": 0.0004282824811525643, "timestamp": "2025-09-10 02:30:30.172093", "step": 4491, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:30.201297", "step": 4491, "epoch": 2 }, { "type": "loss", "content": 0.00017427995044272393, "timestamp": "2025-09-10 02:30:30.225037", "step": 4492, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:30.254467", "step": 4492, "epoch": 2 }, { "type": "loss", "content": 0.011240904219448566, "timestamp": "2025-09-10 02:30:30.256545", "step": 4493, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:30.285132", "step": 4493, "epoch": 2 }, { "type": "loss", "content": 0.002480054972693324, "timestamp": "2025-09-10 02:30:30.286814", "step": 4494, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:30.315857", "step": 4494, "epoch": 2 }, { "type": "loss", "content": 0.000373142451280728, "timestamp": "2025-09-10 02:30:30.317778", "step": 4495, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:30.347201", "step": 4495, "epoch": 2 }, { "type": "loss", "content": 0.00369776482693851, "timestamp": "2025-09-10 02:30:30.370586", "step": 4496, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:30.402492", "step": 4496, "epoch": 2 }, { "type": "loss", "content": 0.0031046585645526648, "timestamp": "2025-09-10 02:30:30.404344", "step": 4497, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:30.432921", "step": 4497, "epoch": 2 }, { "type": "loss", "content": 0.0007371109095402062, "timestamp": "2025-09-10 02:30:30.434975", "step": 4498, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:30.463916", "step": 4498, "epoch": 2 }, { "type": "loss", "content": 0.0029690249357372522, "timestamp": "2025-09-10 02:30:30.465804", "step": 4499, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:30.494440", "step": 4499, "epoch": 2 }, { "type": "loss", "content": 0.032537732273340225, "timestamp": "2025-09-10 02:30:30.517796", "step": 4500, "epoch": 2 }, { "type": "info", "content": "Checkpoint saved at step 4500", "timestamp": "2025-09-10 02:30:34.944486", "step": 4500, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:34.982290", "step": 4500, "epoch": 2 }, { "type": "loss", "content": 0.00028259860118851066, "timestamp": "2025-09-10 02:30:34.984380", "step": 4501, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:35.014468", "step": 4501, "epoch": 2 }, { "type": "loss", "content": 0.006036944221705198, "timestamp": "2025-09-10 02:30:35.016298", "step": 4502, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:35.045894", "step": 4502, "epoch": 2 }, { "type": "loss", "content": 0.00026221523876301944, "timestamp": "2025-09-10 02:30:35.047980", "step": 4503, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:35.077275", "step": 4503, "epoch": 2 }, { "type": "loss", "content": 0.0023497173096984625, "timestamp": "2025-09-10 02:30:35.100901", "step": 4504, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:35.130356", "step": 4504, "epoch": 2 }, { "type": "loss", "content": 0.04136881232261658, "timestamp": "2025-09-10 02:30:35.132147", "step": 4505, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:30:35.161923", "step": 4505, "epoch": 2 }, { "type": "loss", "content": 0.008326425217092037, "timestamp": "2025-09-10 02:30:35.163712", "step": 4506, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:35.192416", "step": 4506, "epoch": 2 }, { "type": "loss", "content": 0.0009407126344740391, "timestamp": "2025-09-10 02:30:35.194137", "step": 4507, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:35.222615", "step": 4507, "epoch": 2 }, { "type": "loss", "content": 0.011470125056803226, "timestamp": "2025-09-10 02:30:35.246364", "step": 4508, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:35.275353", "step": 4508, "epoch": 2 }, { "type": "loss", "content": 0.045114580541849136, "timestamp": "2025-09-10 02:30:35.277229", "step": 4509, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:35.306439", "step": 4509, "epoch": 2 }, { "type": "loss", "content": 0.016461007297039032, "timestamp": "2025-09-10 02:30:35.308324", "step": 4510, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:35.337288", "step": 4510, "epoch": 2 }, { "type": "loss", "content": 0.0015779578825458884, "timestamp": "2025-09-10 02:30:35.339222", "step": 4511, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:35.368805", "step": 4511, "epoch": 2 }, { "type": "loss", "content": 0.0010573251638561487, "timestamp": "2025-09-10 02:30:35.392149", "step": 4512, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:35.424045", "step": 4512, "epoch": 2 }, { "type": "loss", "content": 0.0010005880612879992, "timestamp": "2025-09-10 02:30:35.425842", "step": 4513, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:35.454496", "step": 4513, "epoch": 2 }, { "type": "loss", "content": 0.009261549450457096, "timestamp": "2025-09-10 02:30:35.456502", "step": 4514, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:35.485348", "step": 4514, "epoch": 2 }, { "type": "loss", "content": 0.0003881935845129192, "timestamp": "2025-09-10 02:30:35.488300", "step": 4515, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:35.517534", "step": 4515, "epoch": 2 }, { "type": "loss", "content": 0.0477876141667366, "timestamp": "2025-09-10 02:30:35.540842", "step": 4516, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:35.569696", "step": 4516, "epoch": 2 }, { "type": "loss", "content": 0.0003711617609951645, "timestamp": "2025-09-10 02:30:35.571793", "step": 4517, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:35.600905", "step": 4517, "epoch": 2 }, { "type": "loss", "content": 0.0011772002326324582, "timestamp": "2025-09-10 02:30:35.602901", "step": 4518, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:35.631702", "step": 4518, "epoch": 2 }, { "type": "loss", "content": 0.0020203415770083666, "timestamp": "2025-09-10 02:30:35.633678", "step": 4519, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:35.662660", "step": 4519, "epoch": 2 }, { "type": "loss", "content": 0.0011317833559587598, "timestamp": "2025-09-10 02:30:35.686088", "step": 4520, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:35.715203", "step": 4520, "epoch": 2 }, { "type": "loss", "content": 0.0004131353634875268, "timestamp": "2025-09-10 02:30:35.717090", "step": 4521, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:35.746432", "step": 4521, "epoch": 2 }, { "type": "loss", "content": 0.005883991252630949, "timestamp": "2025-09-10 02:30:35.748380", "step": 4522, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:35.777539", "step": 4522, "epoch": 2 }, { "type": "loss", "content": 0.005447355564683676, "timestamp": "2025-09-10 02:30:35.779478", "step": 4523, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:35.808483", "step": 4523, "epoch": 2 }, { "type": "loss", "content": 0.0002769411075860262, "timestamp": "2025-09-10 02:30:35.831945", "step": 4524, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:35.860608", "step": 4524, "epoch": 2 }, { "type": "loss", "content": 0.03765971213579178, "timestamp": "2025-09-10 02:30:35.862600", "step": 4525, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:35.891611", "step": 4525, "epoch": 2 }, { "type": "loss", "content": 0.003222037572413683, "timestamp": "2025-09-10 02:30:35.893552", "step": 4526, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:35.922583", "step": 4526, "epoch": 2 }, { "type": "loss", "content": 0.0010269946651533246, "timestamp": "2025-09-10 02:30:35.924455", "step": 4527, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:35.953620", "step": 4527, "epoch": 2 }, { "type": "loss", "content": 0.0013073551235720515, "timestamp": "2025-09-10 02:30:35.976876", "step": 4528, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:36.005621", "step": 4528, "epoch": 2 }, { "type": "loss", "content": 0.003010762622579932, "timestamp": "2025-09-10 02:30:36.007492", "step": 4529, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:36.036212", "step": 4529, "epoch": 2 }, { "type": "loss", "content": 0.0005810009897686541, "timestamp": "2025-09-10 02:30:36.038268", "step": 4530, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:36.067224", "step": 4530, "epoch": 2 }, { "type": "loss", "content": 0.023207888007164, "timestamp": "2025-09-10 02:30:36.069042", "step": 4531, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:36.097839", "step": 4531, "epoch": 2 }, { "type": "loss", "content": 0.0025188697036355734, "timestamp": "2025-09-10 02:30:36.121398", "step": 4532, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:36.150619", "step": 4532, "epoch": 2 }, { "type": "loss", "content": 0.00013451059930957854, "timestamp": "2025-09-10 02:30:36.152661", "step": 4533, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:36.181801", "step": 4533, "epoch": 2 }, { "type": "loss", "content": 0.0369986928999424, "timestamp": "2025-09-10 02:30:36.183835", "step": 4534, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:36.213081", "step": 4534, "epoch": 2 }, { "type": "loss", "content": 0.0017421423690393567, "timestamp": "2025-09-10 02:30:36.215043", "step": 4535, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:36.244310", "step": 4535, "epoch": 2 }, { "type": "loss", "content": 0.001130439923144877, "timestamp": "2025-09-10 02:30:36.267772", "step": 4536, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:36.297053", "step": 4536, "epoch": 2 }, { "type": "loss", "content": 0.005186374299228191, "timestamp": "2025-09-10 02:30:36.298900", "step": 4537, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:36.327623", "step": 4537, "epoch": 2 }, { "type": "loss", "content": 0.0008350748685188591, "timestamp": "2025-09-10 02:30:36.329455", "step": 4538, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:36.358526", "step": 4538, "epoch": 2 }, { "type": "loss", "content": 0.0476171113550663, "timestamp": "2025-09-10 02:30:36.360330", "step": 4539, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:36.389620", "step": 4539, "epoch": 2 }, { "type": "loss", "content": 0.00242114020511508, "timestamp": "2025-09-10 02:30:36.412863", "step": 4540, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:36.441691", "step": 4540, "epoch": 2 }, { "type": "loss", "content": 0.0016092261066660285, "timestamp": "2025-09-10 02:30:36.443888", "step": 4541, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:36.475386", "step": 4541, "epoch": 2 }, { "type": "loss", "content": 0.0004083625681232661, "timestamp": "2025-09-10 02:30:36.477282", "step": 4542, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:36.506802", "step": 4542, "epoch": 2 }, { "type": "loss", "content": 0.00012146379594923928, "timestamp": "2025-09-10 02:30:36.508782", "step": 4543, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:36.537619", "step": 4543, "epoch": 2 }, { "type": "loss", "content": 0.0010169557062909007, "timestamp": "2025-09-10 02:30:36.561091", "step": 4544, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:36.590239", "step": 4544, "epoch": 2 }, { "type": "loss", "content": 0.004768866579979658, "timestamp": "2025-09-10 02:30:36.592197", "step": 4545, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:36.621579", "step": 4545, "epoch": 2 }, { "type": "loss", "content": 0.04669610410928726, "timestamp": "2025-09-10 02:30:36.623469", "step": 4546, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:36.652562", "step": 4546, "epoch": 2 }, { "type": "loss", "content": 0.00036743137752637267, "timestamp": "2025-09-10 02:30:36.654545", "step": 4547, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:36.683313", "step": 4547, "epoch": 2 }, { "type": "loss", "content": 0.014560094103217125, "timestamp": "2025-09-10 02:30:36.706857", "step": 4548, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:36.735591", "step": 4548, "epoch": 2 }, { "type": "loss", "content": 0.0003129991819150746, "timestamp": "2025-09-10 02:30:36.737521", "step": 4549, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:36.766446", "step": 4549, "epoch": 2 }, { "type": "loss", "content": 0.0013748781057074666, "timestamp": "2025-09-10 02:30:36.768240", "step": 4550, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:36.797325", "step": 4550, "epoch": 2 }, { "type": "loss", "content": 0.004956469871103764, "timestamp": "2025-09-10 02:30:36.799382", "step": 4551, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:36.828415", "step": 4551, "epoch": 2 }, { "type": "loss", "content": 0.036850493401288986, "timestamp": "2025-09-10 02:30:36.851979", "step": 4552, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:36.881034", "step": 4552, "epoch": 2 }, { "type": "loss", "content": 0.00027258231421001256, "timestamp": "2025-09-10 02:30:36.882853", "step": 4553, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:36.911579", "step": 4553, "epoch": 2 }, { "type": "loss", "content": 0.00446205073967576, "timestamp": "2025-09-10 02:30:36.913659", "step": 4554, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:36.942456", "step": 4554, "epoch": 2 }, { "type": "loss", "content": 0.004611088894307613, "timestamp": "2025-09-10 02:30:36.944398", "step": 4555, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:36.973478", "step": 4555, "epoch": 2 }, { "type": "loss", "content": 0.029512211680412292, "timestamp": "2025-09-10 02:30:36.996744", "step": 4556, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:37.025574", "step": 4556, "epoch": 2 }, { "type": "loss", "content": 0.007085477467626333, "timestamp": "2025-09-10 02:30:37.027557", "step": 4557, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:37.056316", "step": 4557, "epoch": 2 }, { "type": "loss", "content": 0.00022227386943995953, "timestamp": "2025-09-10 02:30:37.058258", "step": 4558, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:37.087116", "step": 4558, "epoch": 2 }, { "type": "loss", "content": 0.02596711739897728, "timestamp": "2025-09-10 02:30:37.089037", "step": 4559, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:37.117462", "step": 4559, "epoch": 2 }, { "type": "loss", "content": 0.0008985972381196916, "timestamp": "2025-09-10 02:30:37.140940", "step": 4560, "epoch": 2 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 96 ], "batch_size": 8, "flops": 1898404492032 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 5, 80 ], "batch_size": 8, "flops": 1582003754624 } ], "timestamp": "2025-09-10 02:30:39.092939", "step": 4560, "epoch": 2 }, { "type": "pplx", "content": 2204857.4238991113, "timestamp": "2025-09-10 02:30:39.094819", "step": 4560, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:39.122636", "step": 4560, "epoch": 2 }, { "type": "loss", "content": 0.0112817557528615, "timestamp": "2025-09-10 02:30:39.124427", "step": 4561, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:39.160945", "step": 4561, "epoch": 2 }, { "type": "loss", "content": 0.00012598246394190937, "timestamp": "2025-09-10 02:30:39.169426", "step": 4562, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:39.203089", "step": 4562, "epoch": 2 }, { "type": "loss", "content": 0.00012869889906141907, "timestamp": "2025-09-10 02:30:39.205047", "step": 4563, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:39.235693", "step": 4563, "epoch": 2 }, { "type": "loss", "content": 0.00023604150919709355, "timestamp": "2025-09-10 02:30:39.259181", "step": 4564, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:39.288542", "step": 4564, "epoch": 2 }, { "type": "loss", "content": 0.034922044724226, "timestamp": "2025-09-10 02:30:39.290428", "step": 4565, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:39.322610", "step": 4565, "epoch": 2 }, { "type": "loss", "content": 0.0004843877977691591, "timestamp": "2025-09-10 02:30:39.324425", "step": 4566, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:39.353012", "step": 4566, "epoch": 2 }, { "type": "loss", "content": 0.0006594950682483613, "timestamp": "2025-09-10 02:30:39.354933", "step": 4567, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:39.384341", "step": 4567, "epoch": 2 }, { "type": "loss", "content": 0.00014926897711120546, "timestamp": "2025-09-10 02:30:39.407748", "step": 4568, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:39.436628", "step": 4568, "epoch": 2 }, { "type": "loss", "content": 0.01768476329743862, "timestamp": "2025-09-10 02:30:39.438450", "step": 4569, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:39.468403", "step": 4569, "epoch": 2 }, { "type": "loss", "content": 0.0005520475679077208, "timestamp": "2025-09-10 02:30:39.470545", "step": 4570, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:39.500803", "step": 4570, "epoch": 2 }, { "type": "loss", "content": 0.005577580071985722, "timestamp": "2025-09-10 02:30:39.503562", "step": 4571, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:39.532390", "step": 4571, "epoch": 2 }, { "type": "loss", "content": 0.00513619976118207, "timestamp": "2025-09-10 02:30:39.555672", "step": 4572, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:39.585529", "step": 4572, "epoch": 2 }, { "type": "loss", "content": 0.00020523127750493586, "timestamp": "2025-09-10 02:30:39.589882", "step": 4573, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:39.619568", "step": 4573, "epoch": 2 }, { "type": "loss", "content": 0.00042392921750433743, "timestamp": "2025-09-10 02:30:39.621407", "step": 4574, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:39.658937", "step": 4574, "epoch": 2 }, { "type": "loss", "content": 0.007960820570588112, "timestamp": "2025-09-10 02:30:39.661977", "step": 4575, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:39.691137", "step": 4575, "epoch": 2 }, { "type": "loss", "content": 0.009319993667304516, "timestamp": "2025-09-10 02:30:39.714631", "step": 4576, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:39.743675", "step": 4576, "epoch": 2 }, { "type": "loss", "content": 0.00033753132447600365, "timestamp": "2025-09-10 02:30:39.745492", "step": 4577, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:39.775294", "step": 4577, "epoch": 2 }, { "type": "loss", "content": 0.002440785523504019, "timestamp": "2025-09-10 02:30:39.777906", "step": 4578, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:39.807977", "step": 4578, "epoch": 2 }, { "type": "loss", "content": 0.001942989183589816, "timestamp": "2025-09-10 02:30:39.809938", "step": 4579, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:39.839331", "step": 4579, "epoch": 2 }, { "type": "loss", "content": 0.0011816952610388398, "timestamp": "2025-09-10 02:30:39.862656", "step": 4580, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:39.892184", "step": 4580, "epoch": 2 }, { "type": "loss", "content": 0.0007158272783271968, "timestamp": "2025-09-10 02:30:39.893926", "step": 4581, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:39.923074", "step": 4581, "epoch": 2 }, { "type": "loss", "content": 0.0004863930225837976, "timestamp": "2025-09-10 02:30:39.924984", "step": 4582, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:39.954078", "step": 4582, "epoch": 2 }, { "type": "loss", "content": 0.007595433853566647, "timestamp": "2025-09-10 02:30:39.955951", "step": 4583, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:39.984577", "step": 4583, "epoch": 2 }, { "type": "loss", "content": 0.009131629951298237, "timestamp": "2025-09-10 02:30:40.008371", "step": 4584, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:40.037480", "step": 4584, "epoch": 2 }, { "type": "loss", "content": 0.009902708232402802, "timestamp": "2025-09-10 02:30:40.039426", "step": 4585, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:40.068420", "step": 4585, "epoch": 2 }, { "type": "loss", "content": 0.004625441040843725, "timestamp": "2025-09-10 02:30:40.070286", "step": 4586, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:40.099322", "step": 4586, "epoch": 2 }, { "type": "loss", "content": 0.022473273798823357, "timestamp": "2025-09-10 02:30:40.101943", "step": 4587, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:40.130627", "step": 4587, "epoch": 2 }, { "type": "loss", "content": 0.0013099823845550418, "timestamp": "2025-09-10 02:30:40.154071", "step": 4588, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:40.183884", "step": 4588, "epoch": 2 }, { "type": "loss", "content": 0.0021664605010300875, "timestamp": "2025-09-10 02:30:40.185604", "step": 4589, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:40.214050", "step": 4589, "epoch": 2 }, { "type": "loss", "content": 0.011343925260007381, "timestamp": "2025-09-10 02:30:40.216500", "step": 4590, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:40.245623", "step": 4590, "epoch": 2 }, { "type": "loss", "content": 0.0015838586259633303, "timestamp": "2025-09-10 02:30:40.247464", "step": 4591, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:40.276027", "step": 4591, "epoch": 2 }, { "type": "loss", "content": 0.02695656009018421, "timestamp": "2025-09-10 02:30:40.299275", "step": 4592, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:40.331332", "step": 4592, "epoch": 2 }, { "type": "loss", "content": 0.00487537682056427, "timestamp": "2025-09-10 02:30:40.333187", "step": 4593, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:40.361985", "step": 4593, "epoch": 2 }, { "type": "loss", "content": 0.0014115578960627317, "timestamp": "2025-09-10 02:30:40.363985", "step": 4594, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:40.395100", "step": 4594, "epoch": 2 }, { "type": "loss", "content": 0.015568017028272152, "timestamp": "2025-09-10 02:30:40.397595", "step": 4595, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:40.426632", "step": 4595, "epoch": 2 }, { "type": "loss", "content": 0.00027882744325324893, "timestamp": "2025-09-10 02:30:40.450028", "step": 4596, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:40.478916", "step": 4596, "epoch": 2 }, { "type": "loss", "content": 0.0005282434285618365, "timestamp": "2025-09-10 02:30:40.480836", "step": 4597, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:40.509540", "step": 4597, "epoch": 2 }, { "type": "loss", "content": 0.00030529152718372643, "timestamp": "2025-09-10 02:30:40.512780", "step": 4598, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:40.557635", "step": 4598, "epoch": 2 }, { "type": "loss", "content": 0.015784697607159615, "timestamp": "2025-09-10 02:30:40.559781", "step": 4599, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:40.593319", "step": 4599, "epoch": 2 }, { "type": "loss", "content": 7.095612090779468e-05, "timestamp": "2025-09-10 02:30:40.617932", "step": 4600, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:40.649154", "step": 4600, "epoch": 2 }, { "type": "loss", "content": 0.01217495184391737, "timestamp": "2025-09-10 02:30:40.651516", "step": 4601, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:40.679953", "step": 4601, "epoch": 2 }, { "type": "loss", "content": 0.0007999838562682271, "timestamp": "2025-09-10 02:30:40.682424", "step": 4602, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:40.711149", "step": 4602, "epoch": 2 }, { "type": "loss", "content": 0.006470394786447287, "timestamp": "2025-09-10 02:30:40.720660", "step": 4603, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:40.755854", "step": 4603, "epoch": 2 }, { "type": "loss", "content": 0.001985550858080387, "timestamp": "2025-09-10 02:30:40.779835", "step": 4604, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:40.808505", "step": 4604, "epoch": 2 }, { "type": "loss", "content": 0.004694047849625349, "timestamp": "2025-09-10 02:30:40.811014", "step": 4605, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:40.840819", "step": 4605, "epoch": 2 }, { "type": "loss", "content": 0.0005729757831431925, "timestamp": "2025-09-10 02:30:40.842966", "step": 4606, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:40.871370", "step": 4606, "epoch": 2 }, { "type": "loss", "content": 0.00021095320698805153, "timestamp": "2025-09-10 02:30:40.873318", "step": 4607, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:40.902635", "step": 4607, "epoch": 2 }, { "type": "loss", "content": 0.006257088389247656, "timestamp": "2025-09-10 02:30:40.926884", "step": 4608, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:40.957850", "step": 4608, "epoch": 2 }, { "type": "loss", "content": 0.0003300842654425651, "timestamp": "2025-09-10 02:30:40.960007", "step": 4609, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:40.992161", "step": 4609, "epoch": 2 }, { "type": "loss", "content": 0.0005720595945604146, "timestamp": "2025-09-10 02:30:40.997406", "step": 4610, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:41.026477", "step": 4610, "epoch": 2 }, { "type": "loss", "content": 0.012436196208000183, "timestamp": "2025-09-10 02:30:41.028400", "step": 4611, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:41.057607", "step": 4611, "epoch": 2 }, { "type": "loss", "content": 0.00013509248674381524, "timestamp": "2025-09-10 02:30:41.080877", "step": 4612, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:41.109707", "step": 4612, "epoch": 2 }, { "type": "loss", "content": 0.0005333507433533669, "timestamp": "2025-09-10 02:30:41.111523", "step": 4613, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:41.140906", "step": 4613, "epoch": 2 }, { "type": "loss", "content": 0.00045343072270043194, "timestamp": "2025-09-10 02:30:41.142735", "step": 4614, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:41.171626", "step": 4614, "epoch": 2 }, { "type": "loss", "content": 0.0035763964988291264, "timestamp": "2025-09-10 02:30:41.173663", "step": 4615, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:41.203325", "step": 4615, "epoch": 2 }, { "type": "loss", "content": 0.0002577145060058683, "timestamp": "2025-09-10 02:30:41.226733", "step": 4616, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:41.258264", "step": 4616, "epoch": 2 }, { "type": "loss", "content": 0.0007295700488612056, "timestamp": "2025-09-10 02:30:41.260287", "step": 4617, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:41.290409", "step": 4617, "epoch": 2 }, { "type": "loss", "content": 0.11661987006664276, "timestamp": "2025-09-10 02:30:41.294082", "step": 4618, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:41.332683", "step": 4618, "epoch": 2 }, { "type": "loss", "content": 0.0005766888498328626, "timestamp": "2025-09-10 02:30:41.336365", "step": 4619, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:41.371552", "step": 4619, "epoch": 2 }, { "type": "loss", "content": 0.0007064057281240821, "timestamp": "2025-09-10 02:30:41.395439", "step": 4620, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:41.425081", "step": 4620, "epoch": 2 }, { "type": "loss", "content": 0.00012147352390456945, "timestamp": "2025-09-10 02:30:41.427107", "step": 4621, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:41.455923", "step": 4621, "epoch": 2 }, { "type": "loss", "content": 0.001252091838978231, "timestamp": "2025-09-10 02:30:41.458463", "step": 4622, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:41.487409", "step": 4622, "epoch": 2 }, { "type": "loss", "content": 0.005930963438004255, "timestamp": "2025-09-10 02:30:41.489506", "step": 4623, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:41.519499", "step": 4623, "epoch": 2 }, { "type": "loss", "content": 0.0003072503604926169, "timestamp": "2025-09-10 02:30:41.543542", "step": 4624, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:41.573982", "step": 4624, "epoch": 2 }, { "type": "loss", "content": 0.0008662088657729328, "timestamp": "2025-09-10 02:30:41.575985", "step": 4625, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:41.604687", "step": 4625, "epoch": 2 }, { "type": "loss", "content": 0.00038694750401191413, "timestamp": "2025-09-10 02:30:41.606784", "step": 4626, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:41.636491", "step": 4626, "epoch": 2 }, { "type": "loss", "content": 0.00027813235647045076, "timestamp": "2025-09-10 02:30:41.638314", "step": 4627, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:41.668856", "step": 4627, "epoch": 2 }, { "type": "loss", "content": 0.005301487632095814, "timestamp": "2025-09-10 02:30:41.692623", "step": 4628, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:41.726012", "step": 4628, "epoch": 2 }, { "type": "loss", "content": 0.0001333223917754367, "timestamp": "2025-09-10 02:30:41.728121", "step": 4629, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:41.757633", "step": 4629, "epoch": 2 }, { "type": "loss", "content": 0.027996379882097244, "timestamp": "2025-09-10 02:30:41.759479", "step": 4630, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:41.788366", "step": 4630, "epoch": 2 }, { "type": "loss", "content": 0.020284932106733322, "timestamp": "2025-09-10 02:30:41.790453", "step": 4631, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:41.823358", "step": 4631, "epoch": 2 }, { "type": "loss", "content": 0.0066495900973677635, "timestamp": "2025-09-10 02:30:41.847313", "step": 4632, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:41.881336", "step": 4632, "epoch": 2 }, { "type": "loss", "content": 0.0035037498455494642, "timestamp": "2025-09-10 02:30:41.884140", "step": 4633, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:41.913636", "step": 4633, "epoch": 2 }, { "type": "loss", "content": 8.686402725288644e-05, "timestamp": "2025-09-10 02:30:41.920886", "step": 4634, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:41.954650", "step": 4634, "epoch": 2 }, { "type": "loss", "content": 0.00020267089712433517, "timestamp": "2025-09-10 02:30:41.956835", "step": 4635, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:41.989209", "step": 4635, "epoch": 2 }, { "type": "loss", "content": 0.00013673164357896894, "timestamp": "2025-09-10 02:30:42.012563", "step": 4636, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:42.044167", "step": 4636, "epoch": 2 }, { "type": "loss", "content": 0.0030601483304053545, "timestamp": "2025-09-10 02:30:42.045973", "step": 4637, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:42.075195", "step": 4637, "epoch": 2 }, { "type": "loss", "content": 0.0019239679677411914, "timestamp": "2025-09-10 02:30:42.077883", "step": 4638, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:42.106617", "step": 4638, "epoch": 2 }, { "type": "loss", "content": 0.017276791855692863, "timestamp": "2025-09-10 02:30:42.108523", "step": 4639, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:42.138563", "step": 4639, "epoch": 2 }, { "type": "loss", "content": 0.027837634086608887, "timestamp": "2025-09-10 02:30:42.173814", "step": 4640, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:42.203399", "step": 4640, "epoch": 2 }, { "type": "loss", "content": 0.02407458983361721, "timestamp": "2025-09-10 02:30:42.206145", "step": 4641, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:42.241768", "step": 4641, "epoch": 2 }, { "type": "loss", "content": 0.03673195466399193, "timestamp": "2025-09-10 02:30:42.245392", "step": 4642, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:42.275472", "step": 4642, "epoch": 2 }, { "type": "loss", "content": 0.003787655383348465, "timestamp": "2025-09-10 02:30:42.277433", "step": 4643, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:42.306500", "step": 4643, "epoch": 2 }, { "type": "loss", "content": 0.0010021141497418284, "timestamp": "2025-09-10 02:30:42.329933", "step": 4644, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:42.361217", "step": 4644, "epoch": 2 }, { "type": "loss", "content": 0.02015187032520771, "timestamp": "2025-09-10 02:30:42.365352", "step": 4645, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:42.395877", "step": 4645, "epoch": 2 }, { "type": "loss", "content": 0.04627053812146187, "timestamp": "2025-09-10 02:30:42.397899", "step": 4646, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:42.426539", "step": 4646, "epoch": 2 }, { "type": "loss", "content": 0.013461175374686718, "timestamp": "2025-09-10 02:30:42.428357", "step": 4647, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:42.456911", "step": 4647, "epoch": 2 }, { "type": "loss", "content": 0.0011725391959771514, "timestamp": "2025-09-10 02:30:42.480557", "step": 4648, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:42.512694", "step": 4648, "epoch": 2 }, { "type": "loss", "content": 0.02038337104022503, "timestamp": "2025-09-10 02:30:42.514237", "step": 4649, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:42.545868", "step": 4649, "epoch": 2 }, { "type": "loss", "content": 0.0018510989611968398, "timestamp": "2025-09-10 02:30:42.547783", "step": 4650, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:42.579572", "step": 4650, "epoch": 2 }, { "type": "loss", "content": 0.0019442373886704445, "timestamp": "2025-09-10 02:30:42.581364", "step": 4651, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:42.610080", "step": 4651, "epoch": 2 }, { "type": "loss", "content": 0.0031817667186260223, "timestamp": "2025-09-10 02:30:42.633346", "step": 4652, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:42.663207", "step": 4652, "epoch": 2 }, { "type": "loss", "content": 0.00040753273060545325, "timestamp": "2025-09-10 02:30:42.665012", "step": 4653, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:42.693702", "step": 4653, "epoch": 2 }, { "type": "loss", "content": 0.0002709409745875746, "timestamp": "2025-09-10 02:30:42.695937", "step": 4654, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:42.725483", "step": 4654, "epoch": 2 }, { "type": "loss", "content": 0.0008405909757129848, "timestamp": "2025-09-10 02:30:42.727579", "step": 4655, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:42.756437", "step": 4655, "epoch": 2 }, { "type": "loss", "content": 0.000137664086651057, "timestamp": "2025-09-10 02:30:42.781404", "step": 4656, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:42.810949", "step": 4656, "epoch": 2 }, { "type": "loss", "content": 0.008599597029387951, "timestamp": "2025-09-10 02:30:42.813001", "step": 4657, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:42.842445", "step": 4657, "epoch": 2 }, { "type": "loss", "content": 0.0001108621945604682, "timestamp": "2025-09-10 02:30:42.844564", "step": 4658, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:42.876877", "step": 4658, "epoch": 2 }, { "type": "loss", "content": 0.0005707357777282596, "timestamp": "2025-09-10 02:30:42.879001", "step": 4659, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:42.915123", "step": 4659, "epoch": 2 }, { "type": "loss", "content": 0.0015837346436455846, "timestamp": "2025-09-10 02:30:42.939759", "step": 4660, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:42.969442", "step": 4660, "epoch": 2 }, { "type": "loss", "content": 0.0008016590145416558, "timestamp": "2025-09-10 02:30:42.972439", "step": 4661, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:43.010638", "step": 4661, "epoch": 2 }, { "type": "loss", "content": 0.02274659276008606, "timestamp": "2025-09-10 02:30:43.012657", "step": 4662, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:43.041706", "step": 4662, "epoch": 2 }, { "type": "loss", "content": 0.037858959287405014, "timestamp": "2025-09-10 02:30:43.048490", "step": 4663, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:43.079621", "step": 4663, "epoch": 2 }, { "type": "loss", "content": 0.04054301232099533, "timestamp": "2025-09-10 02:30:43.105035", "step": 4664, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:43.134391", "step": 4664, "epoch": 2 }, { "type": "loss", "content": 0.0031110390555113554, "timestamp": "2025-09-10 02:30:43.136919", "step": 4665, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:43.167371", "step": 4665, "epoch": 2 }, { "type": "loss", "content": 0.0007724833558313549, "timestamp": "2025-09-10 02:30:43.173496", "step": 4666, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:43.209853", "step": 4666, "epoch": 2 }, { "type": "loss", "content": 0.002394846873357892, "timestamp": "2025-09-10 02:30:43.216483", "step": 4667, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:43.249223", "step": 4667, "epoch": 2 }, { "type": "loss", "content": 0.00028845228371210396, "timestamp": "2025-09-10 02:30:43.273554", "step": 4668, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:43.302254", "step": 4668, "epoch": 2 }, { "type": "loss", "content": 0.04577751085162163, "timestamp": "2025-09-10 02:30:43.306447", "step": 4669, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:43.337230", "step": 4669, "epoch": 2 }, { "type": "loss", "content": 0.02678990364074707, "timestamp": "2025-09-10 02:30:43.339109", "step": 4670, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:43.367863", "step": 4670, "epoch": 2 }, { "type": "loss", "content": 0.011855545453727245, "timestamp": "2025-09-10 02:30:43.370035", "step": 4671, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:43.398982", "step": 4671, "epoch": 2 }, { "type": "loss", "content": 0.04165295884013176, "timestamp": "2025-09-10 02:30:43.422536", "step": 4672, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:43.451636", "step": 4672, "epoch": 2 }, { "type": "loss", "content": 0.017214450985193253, "timestamp": "2025-09-10 02:30:43.453514", "step": 4673, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:43.487264", "step": 4673, "epoch": 2 }, { "type": "loss", "content": 0.009694449603557587, "timestamp": "2025-09-10 02:30:43.491378", "step": 4674, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:43.522045", "step": 4674, "epoch": 2 }, { "type": "loss", "content": 0.006493962835520506, "timestamp": "2025-09-10 02:30:43.525062", "step": 4675, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:43.557196", "step": 4675, "epoch": 2 }, { "type": "loss", "content": 0.0008258892921730876, "timestamp": "2025-09-10 02:30:43.580796", "step": 4676, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:43.609692", "step": 4676, "epoch": 2 }, { "type": "loss", "content": 0.016906818374991417, "timestamp": "2025-09-10 02:30:43.611806", "step": 4677, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:43.640809", "step": 4677, "epoch": 2 }, { "type": "loss", "content": 0.0019898030441254377, "timestamp": "2025-09-10 02:30:43.643271", "step": 4678, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:43.673630", "step": 4678, "epoch": 2 }, { "type": "loss", "content": 0.0012549569364637136, "timestamp": "2025-09-10 02:30:43.675650", "step": 4679, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:43.704425", "step": 4679, "epoch": 2 }, { "type": "loss", "content": 0.003011396387591958, "timestamp": "2025-09-10 02:30:43.733089", "step": 4680, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:43.763147", "step": 4680, "epoch": 2 }, { "type": "loss", "content": 0.0006827022880315781, "timestamp": "2025-09-10 02:30:43.765001", "step": 4681, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:43.793751", "step": 4681, "epoch": 2 }, { "type": "loss", "content": 0.019386066123843193, "timestamp": "2025-09-10 02:30:43.796124", "step": 4682, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:43.824919", "step": 4682, "epoch": 2 }, { "type": "loss", "content": 0.07785572856664658, "timestamp": "2025-09-10 02:30:43.826963", "step": 4683, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:43.855483", "step": 4683, "epoch": 2 }, { "type": "loss", "content": 0.003492309246212244, "timestamp": "2025-09-10 02:30:43.879161", "step": 4684, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:43.909882", "step": 4684, "epoch": 2 }, { "type": "loss", "content": 0.005199705250561237, "timestamp": "2025-09-10 02:30:43.911875", "step": 4685, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:43.943726", "step": 4685, "epoch": 2 }, { "type": "loss", "content": 0.0023987186141312122, "timestamp": "2025-09-10 02:30:43.945783", "step": 4686, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:43.974628", "step": 4686, "epoch": 2 }, { "type": "loss", "content": 0.014172668568789959, "timestamp": "2025-09-10 02:30:43.977686", "step": 4687, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:44.014211", "step": 4687, "epoch": 2 }, { "type": "loss", "content": 0.003468379145488143, "timestamp": "2025-09-10 02:30:44.037738", "step": 4688, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:44.066953", "step": 4688, "epoch": 2 }, { "type": "loss", "content": 0.039519794285297394, "timestamp": "2025-09-10 02:30:44.069521", "step": 4689, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:44.099205", "step": 4689, "epoch": 2 }, { "type": "loss", "content": 0.029918795451521873, "timestamp": "2025-09-10 02:30:44.102615", "step": 4690, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:44.131355", "step": 4690, "epoch": 2 }, { "type": "loss", "content": 0.0024169038515537977, "timestamp": "2025-09-10 02:30:44.133254", "step": 4691, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:44.161931", "step": 4691, "epoch": 2 }, { "type": "loss", "content": 0.025109851732850075, "timestamp": "2025-09-10 02:30:44.189382", "step": 4692, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:44.221348", "step": 4692, "epoch": 2 }, { "type": "loss", "content": 0.005677707493305206, "timestamp": "2025-09-10 02:30:44.225395", "step": 4693, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:44.255279", "step": 4693, "epoch": 2 }, { "type": "loss", "content": 0.027732862159609795, "timestamp": "2025-09-10 02:30:44.257197", "step": 4694, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:44.285701", "step": 4694, "epoch": 2 }, { "type": "loss", "content": 0.005289082881063223, "timestamp": "2025-09-10 02:30:44.288032", "step": 4695, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:44.316635", "step": 4695, "epoch": 2 }, { "type": "loss", "content": 0.006506029516458511, "timestamp": "2025-09-10 02:30:44.340230", "step": 4696, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:44.373189", "step": 4696, "epoch": 2 }, { "type": "loss", "content": 0.0031115796882659197, "timestamp": "2025-09-10 02:30:44.375106", "step": 4697, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:44.404596", "step": 4697, "epoch": 2 }, { "type": "loss", "content": 0.0010988533031195402, "timestamp": "2025-09-10 02:30:44.406836", "step": 4698, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:44.436008", "step": 4698, "epoch": 2 }, { "type": "loss", "content": 0.01683671586215496, "timestamp": "2025-09-10 02:30:44.437915", "step": 4699, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:44.467699", "step": 4699, "epoch": 2 }, { "type": "loss", "content": 0.02131526544690132, "timestamp": "2025-09-10 02:30:44.491430", "step": 4700, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:44.522417", "step": 4700, "epoch": 2 }, { "type": "loss", "content": 0.008895104750990868, "timestamp": "2025-09-10 02:30:44.532048", "step": 4701, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:44.564307", "step": 4701, "epoch": 2 }, { "type": "loss", "content": 0.03631191700696945, "timestamp": "2025-09-10 02:30:44.566308", "step": 4702, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:44.595457", "step": 4702, "epoch": 2 }, { "type": "loss", "content": 0.023046817630529404, "timestamp": "2025-09-10 02:30:44.599918", "step": 4703, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:44.629210", "step": 4703, "epoch": 2 }, { "type": "loss", "content": 0.002487179124727845, "timestamp": "2025-09-10 02:30:44.653036", "step": 4704, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:44.682241", "step": 4704, "epoch": 2 }, { "type": "loss", "content": 0.008304216898977757, "timestamp": "2025-09-10 02:30:44.684165", "step": 4705, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:44.712626", "step": 4705, "epoch": 2 }, { "type": "loss", "content": 0.03096388466656208, "timestamp": "2025-09-10 02:30:44.714530", "step": 4706, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:44.743155", "step": 4706, "epoch": 2 }, { "type": "loss", "content": 0.00456048222258687, "timestamp": "2025-09-10 02:30:44.745119", "step": 4707, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:44.774873", "step": 4707, "epoch": 2 }, { "type": "loss", "content": 0.022228265181183815, "timestamp": "2025-09-10 02:30:44.798344", "step": 4708, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:44.827008", "step": 4708, "epoch": 2 }, { "type": "loss", "content": 0.005984528921544552, "timestamp": "2025-09-10 02:30:44.828914", "step": 4709, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:44.863219", "step": 4709, "epoch": 2 }, { "type": "loss", "content": 0.0011961181880906224, "timestamp": "2025-09-10 02:30:44.865556", "step": 4710, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:44.895578", "step": 4710, "epoch": 2 }, { "type": "loss", "content": 0.013456185348331928, "timestamp": "2025-09-10 02:30:44.899777", "step": 4711, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:44.932157", "step": 4711, "epoch": 2 }, { "type": "loss", "content": 0.0036757667548954487, "timestamp": "2025-09-10 02:30:44.956777", "step": 4712, "epoch": 2 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 96 ], "batch_size": 8, "flops": 1898404492032 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 5, 80 ], "batch_size": 8, "flops": 1582003754624 } ], "timestamp": "2025-09-10 02:30:46.943632", "step": 4712, "epoch": 2 }, { "type": "pplx", "content": 2189411.935164041, "timestamp": "2025-09-10 02:30:46.950644", "step": 4712, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:46.983917", "step": 4712, "epoch": 2 }, { "type": "loss", "content": 0.012935475446283817, "timestamp": "2025-09-10 02:30:46.990285", "step": 4713, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:47.022140", "step": 4713, "epoch": 2 }, { "type": "loss", "content": 0.028412139043211937, "timestamp": "2025-09-10 02:30:47.024790", "step": 4714, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:47.057810", "step": 4714, "epoch": 2 }, { "type": "loss", "content": 0.00245910813100636, "timestamp": "2025-09-10 02:30:47.059944", "step": 4715, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:47.093278", "step": 4715, "epoch": 2 }, { "type": "loss", "content": 0.0012804417638108134, "timestamp": "2025-09-10 02:30:47.116909", "step": 4716, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:47.146954", "step": 4716, "epoch": 2 }, { "type": "loss", "content": 0.014916189014911652, "timestamp": "2025-09-10 02:30:47.149810", "step": 4717, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:47.179596", "step": 4717, "epoch": 2 }, { "type": "loss", "content": 0.027777446433901787, "timestamp": "2025-09-10 02:30:47.181586", "step": 4718, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:47.212111", "step": 4718, "epoch": 2 }, { "type": "loss", "content": 0.0018172768177464604, "timestamp": "2025-09-10 02:30:47.217945", "step": 4719, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:47.249552", "step": 4719, "epoch": 2 }, { "type": "loss", "content": 0.03047746792435646, "timestamp": "2025-09-10 02:30:47.273245", "step": 4720, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:47.307706", "step": 4720, "epoch": 2 }, { "type": "loss", "content": 0.0038779808674007654, "timestamp": "2025-09-10 02:30:47.311321", "step": 4721, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:47.342423", "step": 4721, "epoch": 2 }, { "type": "loss", "content": 0.009764066897332668, "timestamp": "2025-09-10 02:30:47.344400", "step": 4722, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:47.375506", "step": 4722, "epoch": 2 }, { "type": "loss", "content": 0.005471152253448963, "timestamp": "2025-09-10 02:30:47.378413", "step": 4723, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:47.415050", "step": 4723, "epoch": 2 }, { "type": "loss", "content": 0.0015373850474134088, "timestamp": "2025-09-10 02:30:47.443267", "step": 4724, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:47.476622", "step": 4724, "epoch": 2 }, { "type": "loss", "content": 0.017080390825867653, "timestamp": "2025-09-10 02:30:47.481587", "step": 4725, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:47.513468", "step": 4725, "epoch": 2 }, { "type": "loss", "content": 0.10140477120876312, "timestamp": "2025-09-10 02:30:47.517208", "step": 4726, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:47.548532", "step": 4726, "epoch": 2 }, { "type": "loss", "content": 0.005187637638300657, "timestamp": "2025-09-10 02:30:47.550419", "step": 4727, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:30:47.583564", "step": 4727, "epoch": 2 }, { "type": "loss", "content": 0.003937442786991596, "timestamp": "2025-09-10 02:30:47.607657", "step": 4728, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:47.641128", "step": 4728, "epoch": 2 }, { "type": "loss", "content": 0.010151097550988197, "timestamp": "2025-09-10 02:30:47.643458", "step": 4729, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:47.672964", "step": 4729, "epoch": 2 }, { "type": "loss", "content": 0.013919848017394543, "timestamp": "2025-09-10 02:30:47.675010", "step": 4730, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:47.705200", "step": 4730, "epoch": 2 }, { "type": "loss", "content": 0.033086393028497696, "timestamp": "2025-09-10 02:30:47.707519", "step": 4731, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:47.760257", "step": 4731, "epoch": 2 }, { "type": "loss", "content": 0.006276473868638277, "timestamp": "2025-09-10 02:30:47.783966", "step": 4732, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:47.825242", "step": 4732, "epoch": 2 }, { "type": "loss", "content": 0.000802433758508414, "timestamp": "2025-09-10 02:30:47.827097", "step": 4733, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:47.856141", "step": 4733, "epoch": 2 }, { "type": "loss", "content": 0.0005969117628410459, "timestamp": "2025-09-10 02:30:47.858172", "step": 4734, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:47.888222", "step": 4734, "epoch": 2 }, { "type": "loss", "content": 0.013208402320742607, "timestamp": "2025-09-10 02:30:47.890404", "step": 4735, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:47.922676", "step": 4735, "epoch": 2 }, { "type": "loss", "content": 0.002340029925107956, "timestamp": "2025-09-10 02:30:47.946404", "step": 4736, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:47.975599", "step": 4736, "epoch": 2 }, { "type": "loss", "content": 0.010006987489759922, "timestamp": "2025-09-10 02:30:47.977595", "step": 4737, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:48.007076", "step": 4737, "epoch": 2 }, { "type": "loss", "content": 0.0012558095622807741, "timestamp": "2025-09-10 02:30:48.009670", "step": 4738, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:48.038706", "step": 4738, "epoch": 2 }, { "type": "loss", "content": 0.0228542722761631, "timestamp": "2025-09-10 02:30:48.041043", "step": 4739, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:48.070160", "step": 4739, "epoch": 2 }, { "type": "loss", "content": 0.018659209832549095, "timestamp": "2025-09-10 02:30:48.093650", "step": 4740, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:48.123026", "step": 4740, "epoch": 2 }, { "type": "loss", "content": 0.03863928094506264, "timestamp": "2025-09-10 02:30:48.125075", "step": 4741, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:48.162263", "step": 4741, "epoch": 2 }, { "type": "loss", "content": 0.0017381685320287943, "timestamp": "2025-09-10 02:30:48.164670", "step": 4742, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:48.204703", "step": 4742, "epoch": 2 }, { "type": "loss", "content": 0.0007383439806289971, "timestamp": "2025-09-10 02:30:48.206780", "step": 4743, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:48.235891", "step": 4743, "epoch": 2 }, { "type": "loss", "content": 0.004693225957453251, "timestamp": "2025-09-10 02:30:48.259720", "step": 4744, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:48.291951", "step": 4744, "epoch": 2 }, { "type": "loss", "content": 0.0011855376651510596, "timestamp": "2025-09-10 02:30:48.294037", "step": 4745, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:48.323530", "step": 4745, "epoch": 2 }, { "type": "loss", "content": 0.03477257490158081, "timestamp": "2025-09-10 02:30:48.325739", "step": 4746, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:48.356033", "step": 4746, "epoch": 2 }, { "type": "loss", "content": 0.020502671599388123, "timestamp": "2025-09-10 02:30:48.359530", "step": 4747, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:48.390703", "step": 4747, "epoch": 2 }, { "type": "loss", "content": 0.004528583027422428, "timestamp": "2025-09-10 02:30:48.414559", "step": 4748, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:48.445247", "step": 4748, "epoch": 2 }, { "type": "loss", "content": 0.028670435771346092, "timestamp": "2025-09-10 02:30:48.447524", "step": 4749, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:48.477658", "step": 4749, "epoch": 2 }, { "type": "loss", "content": 0.021358918398618698, "timestamp": "2025-09-10 02:30:48.480839", "step": 4750, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:48.513074", "step": 4750, "epoch": 2 }, { "type": "loss", "content": 0.0009635729365982115, "timestamp": "2025-09-10 02:30:48.515355", "step": 4751, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:48.545419", "step": 4751, "epoch": 2 }, { "type": "loss", "content": 0.013524622656404972, "timestamp": "2025-09-10 02:30:48.569661", "step": 4752, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:48.599023", "step": 4752, "epoch": 2 }, { "type": "loss", "content": 0.006929680239409208, "timestamp": "2025-09-10 02:30:48.600950", "step": 4753, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:48.630016", "step": 4753, "epoch": 2 }, { "type": "loss", "content": 0.00010071784345200285, "timestamp": "2025-09-10 02:30:48.632782", "step": 4754, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:48.664816", "step": 4754, "epoch": 2 }, { "type": "loss", "content": 0.0015575990546494722, "timestamp": "2025-09-10 02:30:48.667070", "step": 4755, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:48.697112", "step": 4755, "epoch": 2 }, { "type": "loss", "content": 0.0017869753064587712, "timestamp": "2025-09-10 02:30:48.720950", "step": 4756, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:48.750314", "step": 4756, "epoch": 2 }, { "type": "loss", "content": 0.0012821994023397565, "timestamp": "2025-09-10 02:30:48.752332", "step": 4757, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:48.781699", "step": 4757, "epoch": 2 }, { "type": "loss", "content": 0.0017570492345839739, "timestamp": "2025-09-10 02:30:48.783961", "step": 4758, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:48.813367", "step": 4758, "epoch": 2 }, { "type": "loss", "content": 0.023131389170885086, "timestamp": "2025-09-10 02:30:48.815350", "step": 4759, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:48.844640", "step": 4759, "epoch": 2 }, { "type": "loss", "content": 0.005593287758529186, "timestamp": "2025-09-10 02:30:48.868429", "step": 4760, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:48.897767", "step": 4760, "epoch": 2 }, { "type": "loss", "content": 0.00040549522964283824, "timestamp": "2025-09-10 02:30:48.899881", "step": 4761, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:48.935826", "step": 4761, "epoch": 2 }, { "type": "loss", "content": 0.026496440172195435, "timestamp": "2025-09-10 02:30:48.938266", "step": 4762, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:48.967707", "step": 4762, "epoch": 2 }, { "type": "loss", "content": 0.008946560323238373, "timestamp": "2025-09-10 02:30:48.972334", "step": 4763, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:49.009629", "step": 4763, "epoch": 2 }, { "type": "loss", "content": 0.0011749848490580916, "timestamp": "2025-09-10 02:30:49.033410", "step": 4764, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:49.062968", "step": 4764, "epoch": 2 }, { "type": "loss", "content": 0.0005551399663090706, "timestamp": "2025-09-10 02:30:49.065691", "step": 4765, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:49.096877", "step": 4765, "epoch": 2 }, { "type": "loss", "content": 0.014968165196478367, "timestamp": "2025-09-10 02:30:49.098825", "step": 4766, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:49.134078", "step": 4766, "epoch": 2 }, { "type": "loss", "content": 0.0019150032894685864, "timestamp": "2025-09-10 02:30:49.136785", "step": 4767, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:49.170733", "step": 4767, "epoch": 2 }, { "type": "loss", "content": 0.016086271032691002, "timestamp": "2025-09-10 02:30:49.195400", "step": 4768, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:49.225205", "step": 4768, "epoch": 2 }, { "type": "loss", "content": 0.006012638099491596, "timestamp": "2025-09-10 02:30:49.227439", "step": 4769, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:49.262108", "step": 4769, "epoch": 2 }, { "type": "loss", "content": 0.002948788460344076, "timestamp": "2025-09-10 02:30:49.264416", "step": 4770, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:49.296867", "step": 4770, "epoch": 2 }, { "type": "loss", "content": 0.003266706829890609, "timestamp": "2025-09-10 02:30:49.299099", "step": 4771, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:49.328573", "step": 4771, "epoch": 2 }, { "type": "loss", "content": 0.03831540793180466, "timestamp": "2025-09-10 02:30:49.352343", "step": 4772, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:49.382695", "step": 4772, "epoch": 2 }, { "type": "loss", "content": 0.004111675079911947, "timestamp": "2025-09-10 02:30:49.384809", "step": 4773, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:49.414886", "step": 4773, "epoch": 2 }, { "type": "loss", "content": 0.006892775185406208, "timestamp": "2025-09-10 02:30:49.419889", "step": 4774, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:49.455414", "step": 4774, "epoch": 2 }, { "type": "loss", "content": 0.0016535267932340503, "timestamp": "2025-09-10 02:30:49.458273", "step": 4775, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:49.491932", "step": 4775, "epoch": 2 }, { "type": "loss", "content": 0.07653103768825531, "timestamp": "2025-09-10 02:30:49.520106", "step": 4776, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:49.553327", "step": 4776, "epoch": 2 }, { "type": "loss", "content": 0.009428937919437885, "timestamp": "2025-09-10 02:30:49.556060", "step": 4777, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:49.587052", "step": 4777, "epoch": 2 }, { "type": "loss", "content": 0.09126269817352295, "timestamp": "2025-09-10 02:30:49.589186", "step": 4778, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:49.618868", "step": 4778, "epoch": 2 }, { "type": "loss", "content": 0.028932299464941025, "timestamp": "2025-09-10 02:30:49.622162", "step": 4779, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:49.652698", "step": 4779, "epoch": 2 }, { "type": "loss", "content": 0.0021292385645210743, "timestamp": "2025-09-10 02:30:49.676942", "step": 4780, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:49.707757", "step": 4780, "epoch": 2 }, { "type": "loss", "content": 0.05218474194407463, "timestamp": "2025-09-10 02:30:49.710957", "step": 4781, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:49.741997", "step": 4781, "epoch": 2 }, { "type": "loss", "content": 0.026806095615029335, "timestamp": "2025-09-10 02:30:49.744949", "step": 4782, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:49.779894", "step": 4782, "epoch": 2 }, { "type": "loss", "content": 0.008151252754032612, "timestamp": "2025-09-10 02:30:49.781983", "step": 4783, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:49.810960", "step": 4783, "epoch": 2 }, { "type": "loss", "content": 0.03712259605526924, "timestamp": "2025-09-10 02:30:49.834467", "step": 4784, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:49.865568", "step": 4784, "epoch": 2 }, { "type": "loss", "content": 0.0027715428732335567, "timestamp": "2025-09-10 02:30:49.869335", "step": 4785, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:49.902393", "step": 4785, "epoch": 2 }, { "type": "loss", "content": 0.02292325720191002, "timestamp": "2025-09-10 02:30:49.904897", "step": 4786, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:49.935530", "step": 4786, "epoch": 2 }, { "type": "loss", "content": 0.0007113219471648335, "timestamp": "2025-09-10 02:30:49.940047", "step": 4787, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:49.969873", "step": 4787, "epoch": 2 }, { "type": "loss", "content": 0.014475691132247448, "timestamp": "2025-09-10 02:30:49.993371", "step": 4788, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:50.024696", "step": 4788, "epoch": 2 }, { "type": "loss", "content": 0.002319976920261979, "timestamp": "2025-09-10 02:30:50.032337", "step": 4789, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:30:50.062410", "step": 4789, "epoch": 2 }, { "type": "loss", "content": 0.032773204147815704, "timestamp": "2025-09-10 02:30:50.065676", "step": 4790, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:50.098058", "step": 4790, "epoch": 2 }, { "type": "loss", "content": 0.0004146102874074131, "timestamp": "2025-09-10 02:30:50.102498", "step": 4791, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:50.131632", "step": 4791, "epoch": 2 }, { "type": "loss", "content": 0.012024929746985435, "timestamp": "2025-09-10 02:30:50.154931", "step": 4792, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:50.184429", "step": 4792, "epoch": 2 }, { "type": "loss", "content": 0.00999387912452221, "timestamp": "2025-09-10 02:30:50.186142", "step": 4793, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:50.215440", "step": 4793, "epoch": 2 }, { "type": "loss", "content": 0.0016098637133836746, "timestamp": "2025-09-10 02:30:50.217605", "step": 4794, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:50.246943", "step": 4794, "epoch": 2 }, { "type": "loss", "content": 0.016725609079003334, "timestamp": "2025-09-10 02:30:50.248875", "step": 4795, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:50.277914", "step": 4795, "epoch": 2 }, { "type": "loss", "content": 0.01718818210065365, "timestamp": "2025-09-10 02:30:50.301528", "step": 4796, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:50.330987", "step": 4796, "epoch": 2 }, { "type": "loss", "content": 0.015896115452051163, "timestamp": "2025-09-10 02:30:50.333079", "step": 4797, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:50.362279", "step": 4797, "epoch": 2 }, { "type": "loss", "content": 0.002817119937390089, "timestamp": "2025-09-10 02:30:50.364114", "step": 4798, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:50.393871", "step": 4798, "epoch": 2 }, { "type": "loss", "content": 0.04349520429968834, "timestamp": "2025-09-10 02:30:50.395659", "step": 4799, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:50.424257", "step": 4799, "epoch": 2 }, { "type": "loss", "content": 0.001361390925012529, "timestamp": "2025-09-10 02:30:50.448987", "step": 4800, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:50.480165", "step": 4800, "epoch": 2 }, { "type": "loss", "content": 0.01079056691378355, "timestamp": "2025-09-10 02:30:50.483438", "step": 4801, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:50.512611", "step": 4801, "epoch": 2 }, { "type": "loss", "content": 0.002668142318725586, "timestamp": "2025-09-10 02:30:50.515076", "step": 4802, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:50.545561", "step": 4802, "epoch": 2 }, { "type": "loss", "content": 0.022520411759614944, "timestamp": "2025-09-10 02:30:50.547374", "step": 4803, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:50.576455", "step": 4803, "epoch": 2 }, { "type": "loss", "content": 0.010405519045889378, "timestamp": "2025-09-10 02:30:50.600396", "step": 4804, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:50.635884", "step": 4804, "epoch": 2 }, { "type": "loss", "content": 0.01020369678735733, "timestamp": "2025-09-10 02:30:50.637552", "step": 4805, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:50.666451", "step": 4805, "epoch": 2 }, { "type": "loss", "content": 0.006493265740573406, "timestamp": "2025-09-10 02:30:50.668314", "step": 4806, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:50.697063", "step": 4806, "epoch": 2 }, { "type": "loss", "content": 0.04450357332825661, "timestamp": "2025-09-10 02:30:50.698998", "step": 4807, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:50.728647", "step": 4807, "epoch": 2 }, { "type": "loss", "content": 0.00014824536629021168, "timestamp": "2025-09-10 02:30:50.752699", "step": 4808, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:50.781969", "step": 4808, "epoch": 2 }, { "type": "loss", "content": 0.013092207722365856, "timestamp": "2025-09-10 02:30:50.783988", "step": 4809, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:50.813679", "step": 4809, "epoch": 2 }, { "type": "loss", "content": 0.03313382714986801, "timestamp": "2025-09-10 02:30:50.815639", "step": 4810, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:50.844950", "step": 4810, "epoch": 2 }, { "type": "loss", "content": 0.006113601382821798, "timestamp": "2025-09-10 02:30:50.847416", "step": 4811, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:50.876334", "step": 4811, "epoch": 2 }, { "type": "loss", "content": 0.0017574252560734749, "timestamp": "2025-09-10 02:30:50.900001", "step": 4812, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:50.930074", "step": 4812, "epoch": 2 }, { "type": "loss", "content": 0.00816971156746149, "timestamp": "2025-09-10 02:30:50.932172", "step": 4813, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:50.960959", "step": 4813, "epoch": 2 }, { "type": "loss", "content": 0.000925055705010891, "timestamp": "2025-09-10 02:30:50.962899", "step": 4814, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:50.992328", "step": 4814, "epoch": 2 }, { "type": "loss", "content": 0.014087872579693794, "timestamp": "2025-09-10 02:30:50.995190", "step": 4815, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:51.023988", "step": 4815, "epoch": 2 }, { "type": "loss", "content": 0.0012622555950656533, "timestamp": "2025-09-10 02:30:51.047401", "step": 4816, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:51.078791", "step": 4816, "epoch": 2 }, { "type": "loss", "content": 0.020634431391954422, "timestamp": "2025-09-10 02:30:51.080583", "step": 4817, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:51.109384", "step": 4817, "epoch": 2 }, { "type": "loss", "content": 0.0009810065384954214, "timestamp": "2025-09-10 02:30:51.111223", "step": 4818, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:51.141359", "step": 4818, "epoch": 2 }, { "type": "loss", "content": 0.0026875983458012342, "timestamp": "2025-09-10 02:30:51.143427", "step": 4819, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:51.172694", "step": 4819, "epoch": 2 }, { "type": "loss", "content": 0.029066959396004677, "timestamp": "2025-09-10 02:30:51.196168", "step": 4820, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:51.227063", "step": 4820, "epoch": 2 }, { "type": "loss", "content": 0.00033089984208345413, "timestamp": "2025-09-10 02:30:51.230213", "step": 4821, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:51.259280", "step": 4821, "epoch": 2 }, { "type": "loss", "content": 0.00021650652342941612, "timestamp": "2025-09-10 02:30:51.261415", "step": 4822, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:51.290713", "step": 4822, "epoch": 2 }, { "type": "loss", "content": 0.0036471416242420673, "timestamp": "2025-09-10 02:30:51.292922", "step": 4823, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:51.321614", "step": 4823, "epoch": 2 }, { "type": "loss", "content": 0.0007608329760842025, "timestamp": "2025-09-10 02:30:51.344783", "step": 4824, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:51.373954", "step": 4824, "epoch": 2 }, { "type": "loss", "content": 0.002901230240240693, "timestamp": "2025-09-10 02:30:51.375868", "step": 4825, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:51.405008", "step": 4825, "epoch": 2 }, { "type": "loss", "content": 0.0002802798990160227, "timestamp": "2025-09-10 02:30:51.407983", "step": 4826, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:51.437467", "step": 4826, "epoch": 2 }, { "type": "loss", "content": 0.002789972350001335, "timestamp": "2025-09-10 02:30:51.439277", "step": 4827, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:51.468325", "step": 4827, "epoch": 2 }, { "type": "loss", "content": 0.00826730765402317, "timestamp": "2025-09-10 02:30:51.491634", "step": 4828, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:51.520646", "step": 4828, "epoch": 2 }, { "type": "loss", "content": 0.0010153782786801457, "timestamp": "2025-09-10 02:30:51.522743", "step": 4829, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:51.551519", "step": 4829, "epoch": 2 }, { "type": "loss", "content": 0.0037035089917480946, "timestamp": "2025-09-10 02:30:51.553437", "step": 4830, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:51.582877", "step": 4830, "epoch": 2 }, { "type": "loss", "content": 0.0038321143947541714, "timestamp": "2025-09-10 02:30:51.587180", "step": 4831, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:51.616536", "step": 4831, "epoch": 2 }, { "type": "loss", "content": 0.00136780203320086, "timestamp": "2025-09-10 02:30:51.639827", "step": 4832, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:51.671772", "step": 4832, "epoch": 2 }, { "type": "loss", "content": 0.0012906203046441078, "timestamp": "2025-09-10 02:30:51.673824", "step": 4833, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:51.703020", "step": 4833, "epoch": 2 }, { "type": "loss", "content": 0.006140691693872213, "timestamp": "2025-09-10 02:30:51.704864", "step": 4834, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:51.733578", "step": 4834, "epoch": 2 }, { "type": "loss", "content": 0.01278417557477951, "timestamp": "2025-09-10 02:30:51.735565", "step": 4835, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:51.765359", "step": 4835, "epoch": 2 }, { "type": "loss", "content": 0.01972927153110504, "timestamp": "2025-09-10 02:30:51.788949", "step": 4836, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:51.819553", "step": 4836, "epoch": 2 }, { "type": "loss", "content": 0.00920105166733265, "timestamp": "2025-09-10 02:30:51.821472", "step": 4837, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:51.850252", "step": 4837, "epoch": 2 }, { "type": "loss", "content": 0.000890783965587616, "timestamp": "2025-09-10 02:30:51.854010", "step": 4838, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:51.885129", "step": 4838, "epoch": 2 }, { "type": "loss", "content": 0.026027875021100044, "timestamp": "2025-09-10 02:30:51.887543", "step": 4839, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:51.916875", "step": 4839, "epoch": 2 }, { "type": "loss", "content": 0.0005559767014347017, "timestamp": "2025-09-10 02:30:51.943723", "step": 4840, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:51.973426", "step": 4840, "epoch": 2 }, { "type": "loss", "content": 0.016504112631082535, "timestamp": "2025-09-10 02:30:51.975972", "step": 4841, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:52.004996", "step": 4841, "epoch": 2 }, { "type": "loss", "content": 0.0019031076226383448, "timestamp": "2025-09-10 02:30:52.007277", "step": 4842, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:52.036419", "step": 4842, "epoch": 2 }, { "type": "loss", "content": 0.004857473075389862, "timestamp": "2025-09-10 02:30:52.039201", "step": 4843, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:52.068321", "step": 4843, "epoch": 2 }, { "type": "loss", "content": 0.00918328296393156, "timestamp": "2025-09-10 02:30:52.091986", "step": 4844, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:52.120957", "step": 4844, "epoch": 2 }, { "type": "loss", "content": 0.006627433467656374, "timestamp": "2025-09-10 02:30:52.122846", "step": 4845, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:52.151523", "step": 4845, "epoch": 2 }, { "type": "loss", "content": 0.033394183963537216, "timestamp": "2025-09-10 02:30:52.153667", "step": 4846, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:52.182443", "step": 4846, "epoch": 2 }, { "type": "loss", "content": 0.03488219529390335, "timestamp": "2025-09-10 02:30:52.184305", "step": 4847, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:52.212914", "step": 4847, "epoch": 2 }, { "type": "loss", "content": 0.0009966939687728882, "timestamp": "2025-09-10 02:30:52.236351", "step": 4848, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:52.265768", "step": 4848, "epoch": 2 }, { "type": "loss", "content": 0.001425236347131431, "timestamp": "2025-09-10 02:30:52.267834", "step": 4849, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:52.297224", "step": 4849, "epoch": 2 }, { "type": "loss", "content": 0.020108234137296677, "timestamp": "2025-09-10 02:30:52.299605", "step": 4850, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:52.328634", "step": 4850, "epoch": 2 }, { "type": "loss", "content": 0.0005182851455174387, "timestamp": "2025-09-10 02:30:52.330593", "step": 4851, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:52.359382", "step": 4851, "epoch": 2 }, { "type": "loss", "content": 0.0004465934180188924, "timestamp": "2025-09-10 02:30:52.382974", "step": 4852, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:52.412078", "step": 4852, "epoch": 2 }, { "type": "loss", "content": 0.0028146894183009863, "timestamp": "2025-09-10 02:30:52.417244", "step": 4853, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:52.447859", "step": 4853, "epoch": 2 }, { "type": "loss", "content": 0.005732808727771044, "timestamp": "2025-09-10 02:30:52.450177", "step": 4854, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:52.478948", "step": 4854, "epoch": 2 }, { "type": "loss", "content": 0.014171900227665901, "timestamp": "2025-09-10 02:30:52.480959", "step": 4855, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:52.510368", "step": 4855, "epoch": 2 }, { "type": "loss", "content": 0.04297295957803726, "timestamp": "2025-09-10 02:30:52.534030", "step": 4856, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:52.562966", "step": 4856, "epoch": 2 }, { "type": "loss", "content": 0.005833758972585201, "timestamp": "2025-09-10 02:30:52.564904", "step": 4857, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:52.594240", "step": 4857, "epoch": 2 }, { "type": "loss", "content": 0.002182360040023923, "timestamp": "2025-09-10 02:30:52.600049", "step": 4858, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:52.631390", "step": 4858, "epoch": 2 }, { "type": "loss", "content": 0.052217476069927216, "timestamp": "2025-09-10 02:30:52.633824", "step": 4859, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:52.663977", "step": 4859, "epoch": 2 }, { "type": "loss", "content": 0.0024717673659324646, "timestamp": "2025-09-10 02:30:52.687891", "step": 4860, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:52.716384", "step": 4860, "epoch": 2 }, { "type": "loss", "content": 0.01088649220764637, "timestamp": "2025-09-10 02:30:52.718366", "step": 4861, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:52.747372", "step": 4861, "epoch": 2 }, { "type": "loss", "content": 0.004765079822391272, "timestamp": "2025-09-10 02:30:52.749796", "step": 4862, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:52.778934", "step": 4862, "epoch": 2 }, { "type": "loss", "content": 0.013388590887188911, "timestamp": "2025-09-10 02:30:52.780659", "step": 4863, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:52.809592", "step": 4863, "epoch": 2 }, { "type": "loss", "content": 0.04290319234132767, "timestamp": "2025-09-10 02:30:52.833120", "step": 4864, "epoch": 2 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 96 ], "batch_size": 8, "flops": 1898404492032 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 5, 80 ], "batch_size": 8, "flops": 1582003754624 } ], "timestamp": "2025-09-10 02:30:54.729278", "step": 4864, "epoch": 2 }, { "type": "pplx", "content": 2178451.329655371, "timestamp": "2025-09-10 02:30:54.731373", "step": 4864, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:54.759681", "step": 4864, "epoch": 2 }, { "type": "loss", "content": 0.009877247735857964, "timestamp": "2025-09-10 02:30:54.762156", "step": 4865, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:54.791173", "step": 4865, "epoch": 2 }, { "type": "loss", "content": 0.015933571383357048, "timestamp": "2025-09-10 02:30:54.793230", "step": 4866, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:54.822657", "step": 4866, "epoch": 2 }, { "type": "loss", "content": 0.045118559151887894, "timestamp": "2025-09-10 02:30:54.824601", "step": 4867, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:54.853373", "step": 4867, "epoch": 2 }, { "type": "loss", "content": 0.01064248289912939, "timestamp": "2025-09-10 02:30:54.876950", "step": 4868, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:54.906466", "step": 4868, "epoch": 2 }, { "type": "loss", "content": 0.002190890721976757, "timestamp": "2025-09-10 02:30:54.910045", "step": 4869, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:54.940989", "step": 4869, "epoch": 2 }, { "type": "loss", "content": 0.004141589161008596, "timestamp": "2025-09-10 02:30:54.943081", "step": 4870, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:54.973037", "step": 4870, "epoch": 2 }, { "type": "loss", "content": 0.002087392844259739, "timestamp": "2025-09-10 02:30:54.975670", "step": 4871, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 1, 80 ], "flops": 593517404912 }, "timestamp": "2025-09-10 02:30:55.005230", "step": 4871, "epoch": 2 }, { "type": "loss", "content": 2.8176156774861738e-05, "timestamp": "2025-09-10 02:30:55.028664", "step": 4872, "epoch": 2 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:55.058654", "step": 4872, "epoch": 3 }, { "type": "loss", "content": 0.0014280019095167518, "timestamp": "2025-09-10 02:30:55.060873", "step": 4873, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:55.090133", "step": 4873, "epoch": 3 }, { "type": "loss", "content": 0.001310416730120778, "timestamp": "2025-09-10 02:30:55.092153", "step": 4874, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:55.120821", "step": 4874, "epoch": 3 }, { "type": "loss", "content": 0.012540838681161404, "timestamp": "2025-09-10 02:30:55.123011", "step": 4875, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:55.151824", "step": 4875, "epoch": 3 }, { "type": "loss", "content": 0.003385176882147789, "timestamp": "2025-09-10 02:30:55.175461", "step": 4876, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:55.204720", "step": 4876, "epoch": 3 }, { "type": "loss", "content": 0.017024511471390724, "timestamp": "2025-09-10 02:30:55.206968", "step": 4877, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:55.235625", "step": 4877, "epoch": 3 }, { "type": "loss", "content": 0.0008043819689191878, "timestamp": "2025-09-10 02:30:55.237915", "step": 4878, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:55.267047", "step": 4878, "epoch": 3 }, { "type": "loss", "content": 0.0031665521673858166, "timestamp": "2025-09-10 02:30:55.269039", "step": 4879, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:55.297866", "step": 4879, "epoch": 3 }, { "type": "loss", "content": 0.009495946578681469, "timestamp": "2025-09-10 02:30:55.321615", "step": 4880, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:55.350966", "step": 4880, "epoch": 3 }, { "type": "loss", "content": 0.005551033653318882, "timestamp": "2025-09-10 02:30:55.352782", "step": 4881, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:55.381555", "step": 4881, "epoch": 3 }, { "type": "loss", "content": 0.010024656541645527, "timestamp": "2025-09-10 02:30:55.383607", "step": 4882, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:55.412549", "step": 4882, "epoch": 3 }, { "type": "loss", "content": 0.02356908656656742, "timestamp": "2025-09-10 02:30:55.414568", "step": 4883, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:55.443563", "step": 4883, "epoch": 3 }, { "type": "loss", "content": 0.005587874446064234, "timestamp": "2025-09-10 02:30:55.467302", "step": 4884, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:55.496453", "step": 4884, "epoch": 3 }, { "type": "loss", "content": 0.005373778752982616, "timestamp": "2025-09-10 02:30:55.498428", "step": 4885, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:55.527886", "step": 4885, "epoch": 3 }, { "type": "loss", "content": 0.008848587051033974, "timestamp": "2025-09-10 02:30:55.529851", "step": 4886, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:55.558828", "step": 4886, "epoch": 3 }, { "type": "loss", "content": 0.005631915759295225, "timestamp": "2025-09-10 02:30:55.560887", "step": 4887, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:55.589649", "step": 4887, "epoch": 3 }, { "type": "loss", "content": 0.008946587331593037, "timestamp": "2025-09-10 02:30:55.613157", "step": 4888, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:55.642541", "step": 4888, "epoch": 3 }, { "type": "loss", "content": 0.00208521937020123, "timestamp": "2025-09-10 02:30:55.644607", "step": 4889, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:55.673673", "step": 4889, "epoch": 3 }, { "type": "loss", "content": 0.0030278817284852266, "timestamp": "2025-09-10 02:30:55.675501", "step": 4890, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:55.704385", "step": 4890, "epoch": 3 }, { "type": "loss", "content": 0.004253068007528782, "timestamp": "2025-09-10 02:30:55.706508", "step": 4891, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:55.735543", "step": 4891, "epoch": 3 }, { "type": "loss", "content": 0.0020537772215902805, "timestamp": "2025-09-10 02:30:55.758995", "step": 4892, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:55.788166", "step": 4892, "epoch": 3 }, { "type": "loss", "content": 0.0005098031833767891, "timestamp": "2025-09-10 02:30:55.790448", "step": 4893, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:55.819035", "step": 4893, "epoch": 3 }, { "type": "loss", "content": 0.01048461813479662, "timestamp": "2025-09-10 02:30:55.820952", "step": 4894, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:55.849814", "step": 4894, "epoch": 3 }, { "type": "loss", "content": 0.002517236163839698, "timestamp": "2025-09-10 02:30:55.851904", "step": 4895, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:55.880610", "step": 4895, "epoch": 3 }, { "type": "loss", "content": 0.005761867854744196, "timestamp": "2025-09-10 02:30:55.903958", "step": 4896, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:55.932854", "step": 4896, "epoch": 3 }, { "type": "loss", "content": 0.0038676075637340546, "timestamp": "2025-09-10 02:30:55.934891", "step": 4897, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:55.963160", "step": 4897, "epoch": 3 }, { "type": "loss", "content": 0.0013028824469074607, "timestamp": "2025-09-10 02:30:55.965037", "step": 4898, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:55.993837", "step": 4898, "epoch": 3 }, { "type": "loss", "content": 0.0014792155707255006, "timestamp": "2025-09-10 02:30:55.995574", "step": 4899, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:56.024698", "step": 4899, "epoch": 3 }, { "type": "loss", "content": 0.00812798272818327, "timestamp": "2025-09-10 02:30:56.048091", "step": 4900, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:56.077423", "step": 4900, "epoch": 3 }, { "type": "loss", "content": 9.349980246042833e-05, "timestamp": "2025-09-10 02:30:56.079610", "step": 4901, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:56.108433", "step": 4901, "epoch": 3 }, { "type": "loss", "content": 0.0007435997831635177, "timestamp": "2025-09-10 02:30:56.110405", "step": 4902, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:56.139003", "step": 4902, "epoch": 3 }, { "type": "loss", "content": 0.007078954018652439, "timestamp": "2025-09-10 02:30:56.141088", "step": 4903, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:56.169951", "step": 4903, "epoch": 3 }, { "type": "loss", "content": 0.0019112270092591643, "timestamp": "2025-09-10 02:30:56.193664", "step": 4904, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:56.223792", "step": 4904, "epoch": 3 }, { "type": "loss", "content": 0.0007984461262822151, "timestamp": "2025-09-10 02:30:56.225434", "step": 4905, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:56.254567", "step": 4905, "epoch": 3 }, { "type": "loss", "content": 0.005893030669540167, "timestamp": "2025-09-10 02:30:56.256582", "step": 4906, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:56.285719", "step": 4906, "epoch": 3 }, { "type": "loss", "content": 0.002062671585008502, "timestamp": "2025-09-10 02:30:56.288020", "step": 4907, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:56.317047", "step": 4907, "epoch": 3 }, { "type": "loss", "content": 0.00011138491390738636, "timestamp": "2025-09-10 02:30:56.340758", "step": 4908, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:56.370609", "step": 4908, "epoch": 3 }, { "type": "loss", "content": 0.0002839634835254401, "timestamp": "2025-09-10 02:30:56.372465", "step": 4909, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:56.401909", "step": 4909, "epoch": 3 }, { "type": "loss", "content": 0.03709616884589195, "timestamp": "2025-09-10 02:30:56.403896", "step": 4910, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:56.433071", "step": 4910, "epoch": 3 }, { "type": "loss", "content": 0.0009739446686580777, "timestamp": "2025-09-10 02:30:56.435179", "step": 4911, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:56.464101", "step": 4911, "epoch": 3 }, { "type": "loss", "content": 0.004271974321454763, "timestamp": "2025-09-10 02:30:56.487694", "step": 4912, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:56.516761", "step": 4912, "epoch": 3 }, { "type": "loss", "content": 0.0005912575870752335, "timestamp": "2025-09-10 02:30:56.518939", "step": 4913, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:56.547837", "step": 4913, "epoch": 3 }, { "type": "loss", "content": 0.0004359797458164394, "timestamp": "2025-09-10 02:30:56.549922", "step": 4914, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:56.579060", "step": 4914, "epoch": 3 }, { "type": "loss", "content": 0.020385054871439934, "timestamp": "2025-09-10 02:30:56.580913", "step": 4915, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:56.609976", "step": 4915, "epoch": 3 }, { "type": "loss", "content": 0.01246044784784317, "timestamp": "2025-09-10 02:30:56.633692", "step": 4916, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:56.663254", "step": 4916, "epoch": 3 }, { "type": "loss", "content": 0.0003972502890974283, "timestamp": "2025-09-10 02:30:56.665203", "step": 4917, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:56.694697", "step": 4917, "epoch": 3 }, { "type": "loss", "content": 0.0024749203585088253, "timestamp": "2025-09-10 02:30:56.696842", "step": 4918, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:56.725998", "step": 4918, "epoch": 3 }, { "type": "loss", "content": 0.00011896999785676599, "timestamp": "2025-09-10 02:30:56.727923", "step": 4919, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:56.756735", "step": 4919, "epoch": 3 }, { "type": "loss", "content": 0.03590719774365425, "timestamp": "2025-09-10 02:30:56.780360", "step": 4920, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:56.809418", "step": 4920, "epoch": 3 }, { "type": "loss", "content": 0.0033199347089976072, "timestamp": "2025-09-10 02:30:56.811523", "step": 4921, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:56.841106", "step": 4921, "epoch": 3 }, { "type": "loss", "content": 0.0011541909771040082, "timestamp": "2025-09-10 02:30:56.843436", "step": 4922, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:56.873352", "step": 4922, "epoch": 3 }, { "type": "loss", "content": 0.0016362580936402082, "timestamp": "2025-09-10 02:30:56.875605", "step": 4923, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:30:56.905080", "step": 4923, "epoch": 3 }, { "type": "loss", "content": 0.0036181563045829535, "timestamp": "2025-09-10 02:30:56.928572", "step": 4924, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:56.957967", "step": 4924, "epoch": 3 }, { "type": "loss", "content": 0.05081191286444664, "timestamp": "2025-09-10 02:30:56.959940", "step": 4925, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:56.988879", "step": 4925, "epoch": 3 }, { "type": "loss", "content": 0.0005761417560279369, "timestamp": "2025-09-10 02:30:56.990870", "step": 4926, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:57.019584", "step": 4926, "epoch": 3 }, { "type": "loss", "content": 0.0006865057512186468, "timestamp": "2025-09-10 02:30:57.021584", "step": 4927, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:57.050505", "step": 4927, "epoch": 3 }, { "type": "loss", "content": 0.004511348903179169, "timestamp": "2025-09-10 02:30:57.074331", "step": 4928, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:57.103454", "step": 4928, "epoch": 3 }, { "type": "loss", "content": 0.0009536277502775192, "timestamp": "2025-09-10 02:30:57.105672", "step": 4929, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:57.135133", "step": 4929, "epoch": 3 }, { "type": "loss", "content": 0.00153729144949466, "timestamp": "2025-09-10 02:30:57.137441", "step": 4930, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:57.167292", "step": 4930, "epoch": 3 }, { "type": "loss", "content": 0.0001375428691972047, "timestamp": "2025-09-10 02:30:57.169207", "step": 4931, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:57.197808", "step": 4931, "epoch": 3 }, { "type": "loss", "content": 0.009686694480478764, "timestamp": "2025-09-10 02:30:57.221513", "step": 4932, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:57.250724", "step": 4932, "epoch": 3 }, { "type": "loss", "content": 0.00025545855169184506, "timestamp": "2025-09-10 02:30:57.252677", "step": 4933, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:57.281325", "step": 4933, "epoch": 3 }, { "type": "loss", "content": 0.02290979214012623, "timestamp": "2025-09-10 02:30:57.283327", "step": 4934, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:57.312391", "step": 4934, "epoch": 3 }, { "type": "loss", "content": 0.0005374342435970902, "timestamp": "2025-09-10 02:30:57.314372", "step": 4935, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:57.343458", "step": 4935, "epoch": 3 }, { "type": "loss", "content": 0.00914563238620758, "timestamp": "2025-09-10 02:30:57.367320", "step": 4936, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:57.396425", "step": 4936, "epoch": 3 }, { "type": "loss", "content": 0.008263180032372475, "timestamp": "2025-09-10 02:30:57.398370", "step": 4937, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:57.427491", "step": 4937, "epoch": 3 }, { "type": "loss", "content": 0.0008246535435318947, "timestamp": "2025-09-10 02:30:57.429573", "step": 4938, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:57.460520", "step": 4938, "epoch": 3 }, { "type": "loss", "content": 0.004253962077200413, "timestamp": "2025-09-10 02:30:57.462435", "step": 4939, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:57.492606", "step": 4939, "epoch": 3 }, { "type": "loss", "content": 0.0006401669816114008, "timestamp": "2025-09-10 02:30:57.516326", "step": 4940, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:57.545816", "step": 4940, "epoch": 3 }, { "type": "loss", "content": 0.05252986028790474, "timestamp": "2025-09-10 02:30:57.547685", "step": 4941, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:57.576910", "step": 4941, "epoch": 3 }, { "type": "loss", "content": 0.0003547283704392612, "timestamp": "2025-09-10 02:30:57.579043", "step": 4942, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:57.608108", "step": 4942, "epoch": 3 }, { "type": "loss", "content": 0.00021801998082082719, "timestamp": "2025-09-10 02:30:57.610139", "step": 4943, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:57.639729", "step": 4943, "epoch": 3 }, { "type": "loss", "content": 0.006962130311876535, "timestamp": "2025-09-10 02:30:57.663675", "step": 4944, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:57.693492", "step": 4944, "epoch": 3 }, { "type": "loss", "content": 0.011197717860341072, "timestamp": "2025-09-10 02:30:57.695777", "step": 4945, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:57.724854", "step": 4945, "epoch": 3 }, { "type": "loss", "content": 0.005284208804368973, "timestamp": "2025-09-10 02:30:57.726966", "step": 4946, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:57.760765", "step": 4946, "epoch": 3 }, { "type": "loss", "content": 0.05076735466718674, "timestamp": "2025-09-10 02:30:57.762679", "step": 4947, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:57.791881", "step": 4947, "epoch": 3 }, { "type": "loss", "content": 0.0007167586009018123, "timestamp": "2025-09-10 02:30:57.815453", "step": 4948, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:57.847529", "step": 4948, "epoch": 3 }, { "type": "loss", "content": 0.000604218163061887, "timestamp": "2025-09-10 02:30:57.850359", "step": 4949, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:57.884073", "step": 4949, "epoch": 3 }, { "type": "loss", "content": 0.011483556590974331, "timestamp": "2025-09-10 02:30:57.886684", "step": 4950, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:57.921376", "step": 4950, "epoch": 3 }, { "type": "loss", "content": 0.01305474154651165, "timestamp": "2025-09-10 02:30:57.926067", "step": 4951, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:57.961102", "step": 4951, "epoch": 3 }, { "type": "loss", "content": 0.00013778348511550575, "timestamp": "2025-09-10 02:30:57.984516", "step": 4952, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:30:58.013208", "step": 4952, "epoch": 3 }, { "type": "loss", "content": 0.00018441618885844946, "timestamp": "2025-09-10 02:30:58.018947", "step": 4953, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:58.047736", "step": 4953, "epoch": 3 }, { "type": "loss", "content": 0.05111263319849968, "timestamp": "2025-09-10 02:30:58.049585", "step": 4954, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:58.078583", "step": 4954, "epoch": 3 }, { "type": "loss", "content": 0.0012449711794033647, "timestamp": "2025-09-10 02:30:58.080957", "step": 4955, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:58.110094", "step": 4955, "epoch": 3 }, { "type": "loss", "content": 0.009258122183382511, "timestamp": "2025-09-10 02:30:58.133899", "step": 4956, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:58.163735", "step": 4956, "epoch": 3 }, { "type": "loss", "content": 0.002155735855922103, "timestamp": "2025-09-10 02:30:58.166012", "step": 4957, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:58.195004", "step": 4957, "epoch": 3 }, { "type": "loss", "content": 0.0022412240505218506, "timestamp": "2025-09-10 02:30:58.196950", "step": 4958, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:58.225486", "step": 4958, "epoch": 3 }, { "type": "loss", "content": 0.00033534460817463696, "timestamp": "2025-09-10 02:30:58.227275", "step": 4959, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:58.256020", "step": 4959, "epoch": 3 }, { "type": "loss", "content": 0.0064600491896271706, "timestamp": "2025-09-10 02:30:58.280119", "step": 4960, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:58.310184", "step": 4960, "epoch": 3 }, { "type": "loss", "content": 0.0005327347898855805, "timestamp": "2025-09-10 02:30:58.312268", "step": 4961, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:58.341323", "step": 4961, "epoch": 3 }, { "type": "loss", "content": 0.0001961165980901569, "timestamp": "2025-09-10 02:30:58.343444", "step": 4962, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:58.372501", "step": 4962, "epoch": 3 }, { "type": "loss", "content": 0.0003622942604124546, "timestamp": "2025-09-10 02:30:58.374462", "step": 4963, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:58.403498", "step": 4963, "epoch": 3 }, { "type": "loss", "content": 0.0043032411485910416, "timestamp": "2025-09-10 02:30:58.427006", "step": 4964, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:58.456323", "step": 4964, "epoch": 3 }, { "type": "loss", "content": 0.004201345145702362, "timestamp": "2025-09-10 02:30:58.458489", "step": 4965, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:58.487343", "step": 4965, "epoch": 3 }, { "type": "loss", "content": 0.012078885920345783, "timestamp": "2025-09-10 02:30:58.489343", "step": 4966, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:58.518236", "step": 4966, "epoch": 3 }, { "type": "loss", "content": 0.037341173738241196, "timestamp": "2025-09-10 02:30:58.520124", "step": 4967, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:58.549013", "step": 4967, "epoch": 3 }, { "type": "loss", "content": 0.001226257300004363, "timestamp": "2025-09-10 02:30:58.572606", "step": 4968, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:58.602390", "step": 4968, "epoch": 3 }, { "type": "loss", "content": 0.0004969441215507686, "timestamp": "2025-09-10 02:30:58.604432", "step": 4969, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:58.633771", "step": 4969, "epoch": 3 }, { "type": "loss", "content": 8.056405931711197e-05, "timestamp": "2025-09-10 02:30:58.635845", "step": 4970, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:58.665133", "step": 4970, "epoch": 3 }, { "type": "loss", "content": 0.0022159700747579336, "timestamp": "2025-09-10 02:30:58.667187", "step": 4971, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:58.695843", "step": 4971, "epoch": 3 }, { "type": "loss", "content": 0.000437339476775378, "timestamp": "2025-09-10 02:30:58.719632", "step": 4972, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:58.751437", "step": 4972, "epoch": 3 }, { "type": "loss", "content": 0.0015226582763716578, "timestamp": "2025-09-10 02:30:58.753557", "step": 4973, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:58.783053", "step": 4973, "epoch": 3 }, { "type": "loss", "content": 0.009864314459264278, "timestamp": "2025-09-10 02:30:58.785177", "step": 4974, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:30:58.814421", "step": 4974, "epoch": 3 }, { "type": "loss", "content": 0.014232958666980267, "timestamp": "2025-09-10 02:30:58.816302", "step": 4975, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:58.845790", "step": 4975, "epoch": 3 }, { "type": "loss", "content": 0.001106222509406507, "timestamp": "2025-09-10 02:30:58.869502", "step": 4976, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:58.899119", "step": 4976, "epoch": 3 }, { "type": "loss", "content": 0.00033264182275161147, "timestamp": "2025-09-10 02:30:58.901275", "step": 4977, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:58.931059", "step": 4977, "epoch": 3 }, { "type": "loss", "content": 0.00046894553815945983, "timestamp": "2025-09-10 02:30:58.933069", "step": 4978, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:58.962008", "step": 4978, "epoch": 3 }, { "type": "loss", "content": 0.001208845293149352, "timestamp": "2025-09-10 02:30:58.964084", "step": 4979, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:58.993135", "step": 4979, "epoch": 3 }, { "type": "loss", "content": 0.006808232516050339, "timestamp": "2025-09-10 02:30:59.017447", "step": 4980, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:59.046549", "step": 4980, "epoch": 3 }, { "type": "loss", "content": 0.00014657301653642207, "timestamp": "2025-09-10 02:30:59.048548", "step": 4981, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:59.077633", "step": 4981, "epoch": 3 }, { "type": "loss", "content": 0.07903604209423065, "timestamp": "2025-09-10 02:30:59.079893", "step": 4982, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:59.108814", "step": 4982, "epoch": 3 }, { "type": "loss", "content": 0.031232083216309547, "timestamp": "2025-09-10 02:30:59.110677", "step": 4983, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:59.139020", "step": 4983, "epoch": 3 }, { "type": "loss", "content": 0.0011663326295092702, "timestamp": "2025-09-10 02:30:59.162732", "step": 4984, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:59.191585", "step": 4984, "epoch": 3 }, { "type": "loss", "content": 0.016943303868174553, "timestamp": "2025-09-10 02:30:59.193540", "step": 4985, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:59.222314", "step": 4985, "epoch": 3 }, { "type": "loss", "content": 0.0015794998034834862, "timestamp": "2025-09-10 02:30:59.224229", "step": 4986, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:59.253125", "step": 4986, "epoch": 3 }, { "type": "loss", "content": 0.03883078321814537, "timestamp": "2025-09-10 02:30:59.255095", "step": 4987, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:59.283483", "step": 4987, "epoch": 3 }, { "type": "loss", "content": 0.00023402106307912618, "timestamp": "2025-09-10 02:30:59.306726", "step": 4988, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:59.335306", "step": 4988, "epoch": 3 }, { "type": "loss", "content": 0.005712156184017658, "timestamp": "2025-09-10 02:30:59.337494", "step": 4989, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:59.367008", "step": 4989, "epoch": 3 }, { "type": "loss", "content": 0.000885231769643724, "timestamp": "2025-09-10 02:30:59.368900", "step": 4990, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:59.397889", "step": 4990, "epoch": 3 }, { "type": "loss", "content": 0.00021244284289423376, "timestamp": "2025-09-10 02:30:59.399841", "step": 4991, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:30:59.428952", "step": 4991, "epoch": 3 }, { "type": "loss", "content": 0.009885640814900398, "timestamp": "2025-09-10 02:30:59.452533", "step": 4992, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:59.481548", "step": 4992, "epoch": 3 }, { "type": "loss", "content": 0.00021031063806731254, "timestamp": "2025-09-10 02:30:59.484064", "step": 4993, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:59.512613", "step": 4993, "epoch": 3 }, { "type": "loss", "content": 0.003944438882172108, "timestamp": "2025-09-10 02:30:59.514656", "step": 4994, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:59.543447", "step": 4994, "epoch": 3 }, { "type": "loss", "content": 0.0002700241457205266, "timestamp": "2025-09-10 02:30:59.545555", "step": 4995, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:59.574274", "step": 4995, "epoch": 3 }, { "type": "loss", "content": 0.0028411434032022953, "timestamp": "2025-09-10 02:30:59.597723", "step": 4996, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:59.626826", "step": 4996, "epoch": 3 }, { "type": "loss", "content": 0.0009857058757916093, "timestamp": "2025-09-10 02:30:59.630428", "step": 4997, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:59.661124", "step": 4997, "epoch": 3 }, { "type": "loss", "content": 0.0003661169612314552, "timestamp": "2025-09-10 02:30:59.663116", "step": 4998, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:59.691888", "step": 4998, "epoch": 3 }, { "type": "loss", "content": 0.0011222055181860924, "timestamp": "2025-09-10 02:30:59.694013", "step": 4999, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:30:59.722885", "step": 4999, "epoch": 3 }, { "type": "loss", "content": 0.023360345512628555, "timestamp": "2025-09-10 02:30:59.747175", "step": 5000, "epoch": 3 }, { "type": "info", "content": "Checkpoint saved at step 5000", "timestamp": "2025-09-10 02:31:04.185902", "step": 5000, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:04.225251", "step": 5000, "epoch": 3 }, { "type": "loss", "content": 0.004800345283001661, "timestamp": "2025-09-10 02:31:04.227303", "step": 5001, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:04.259228", "step": 5001, "epoch": 3 }, { "type": "loss", "content": 0.00048084885929711163, "timestamp": "2025-09-10 02:31:04.261214", "step": 5002, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:04.290803", "step": 5002, "epoch": 3 }, { "type": "loss", "content": 0.007763330824673176, "timestamp": "2025-09-10 02:31:04.292958", "step": 5003, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:04.322053", "step": 5003, "epoch": 3 }, { "type": "loss", "content": 0.01378608588129282, "timestamp": "2025-09-10 02:31:04.346130", "step": 5004, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:04.374942", "step": 5004, "epoch": 3 }, { "type": "loss", "content": 0.0037001348100602627, "timestamp": "2025-09-10 02:31:04.378119", "step": 5005, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:04.408073", "step": 5005, "epoch": 3 }, { "type": "loss", "content": 0.00013897249300498515, "timestamp": "2025-09-10 02:31:04.410564", "step": 5006, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:04.440401", "step": 5006, "epoch": 3 }, { "type": "loss", "content": 0.0008228147053159773, "timestamp": "2025-09-10 02:31:04.442584", "step": 5007, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:04.473912", "step": 5007, "epoch": 3 }, { "type": "loss", "content": 0.027694018557667732, "timestamp": "2025-09-10 02:31:04.501495", "step": 5008, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:04.537724", "step": 5008, "epoch": 3 }, { "type": "loss", "content": 0.0046686953864991665, "timestamp": "2025-09-10 02:31:04.539612", "step": 5009, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:04.568994", "step": 5009, "epoch": 3 }, { "type": "loss", "content": 0.00345020298846066, "timestamp": "2025-09-10 02:31:04.571508", "step": 5010, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:04.601580", "step": 5010, "epoch": 3 }, { "type": "loss", "content": 0.00317819113843143, "timestamp": "2025-09-10 02:31:04.603608", "step": 5011, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:04.636356", "step": 5011, "epoch": 3 }, { "type": "loss", "content": 0.0029954214114695787, "timestamp": "2025-09-10 02:31:04.660122", "step": 5012, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:04.689605", "step": 5012, "epoch": 3 }, { "type": "loss", "content": 0.003420087741687894, "timestamp": "2025-09-10 02:31:04.692847", "step": 5013, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:04.721329", "step": 5013, "epoch": 3 }, { "type": "loss", "content": 0.002578622894361615, "timestamp": "2025-09-10 02:31:04.723487", "step": 5014, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:04.756859", "step": 5014, "epoch": 3 }, { "type": "loss", "content": 0.00044571320177055895, "timestamp": "2025-09-10 02:31:04.763216", "step": 5015, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:04.794631", "step": 5015, "epoch": 3 }, { "type": "loss", "content": 0.013255941681563854, "timestamp": "2025-09-10 02:31:04.818710", "step": 5016, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 96 ], "batch_size": 8, "flops": 1898404492032 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 5, 80 ], "batch_size": 8, "flops": 1582003754624 } ], "timestamp": "2025-09-10 02:31:06.850304", "step": 5016, "epoch": 3 }, { "type": "pplx", "content": 2010640.6963496492, "timestamp": "2025-09-10 02:31:06.852476", "step": 5016, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:06.880570", "step": 5016, "epoch": 3 }, { "type": "loss", "content": 0.014487654902040958, "timestamp": "2025-09-10 02:31:06.883849", "step": 5017, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:06.914895", "step": 5017, "epoch": 3 }, { "type": "loss", "content": 0.0020964504219591618, "timestamp": "2025-09-10 02:31:06.916938", "step": 5018, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:06.945872", "step": 5018, "epoch": 3 }, { "type": "loss", "content": 0.0005403442191891372, "timestamp": "2025-09-10 02:31:06.947929", "step": 5019, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:06.976977", "step": 5019, "epoch": 3 }, { "type": "loss", "content": 0.00035207762266509235, "timestamp": "2025-09-10 02:31:07.000788", "step": 5020, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:07.031718", "step": 5020, "epoch": 3 }, { "type": "loss", "content": 0.015816517174243927, "timestamp": "2025-09-10 02:31:07.033653", "step": 5021, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:07.062374", "step": 5021, "epoch": 3 }, { "type": "loss", "content": 0.0006769512547180057, "timestamp": "2025-09-10 02:31:07.064332", "step": 5022, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:07.094912", "step": 5022, "epoch": 3 }, { "type": "loss", "content": 0.0006116781732998788, "timestamp": "2025-09-10 02:31:07.099658", "step": 5023, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:07.130903", "step": 5023, "epoch": 3 }, { "type": "loss", "content": 0.001322591444477439, "timestamp": "2025-09-10 02:31:07.154227", "step": 5024, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:07.183081", "step": 5024, "epoch": 3 }, { "type": "loss", "content": 0.011232644319534302, "timestamp": "2025-09-10 02:31:07.185022", "step": 5025, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:07.213389", "step": 5025, "epoch": 3 }, { "type": "loss", "content": 0.007375881541520357, "timestamp": "2025-09-10 02:31:07.215298", "step": 5026, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:07.244311", "step": 5026, "epoch": 3 }, { "type": "loss", "content": 9.760600369190797e-05, "timestamp": "2025-09-10 02:31:07.246122", "step": 5027, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:07.274692", "step": 5027, "epoch": 3 }, { "type": "loss", "content": 0.006258365698158741, "timestamp": "2025-09-10 02:31:07.297937", "step": 5028, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:07.330179", "step": 5028, "epoch": 3 }, { "type": "loss", "content": 0.000321950443321839, "timestamp": "2025-09-10 02:31:07.337302", "step": 5029, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:07.371440", "step": 5029, "epoch": 3 }, { "type": "loss", "content": 0.00030036564567126334, "timestamp": "2025-09-10 02:31:07.373461", "step": 5030, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:07.404325", "step": 5030, "epoch": 3 }, { "type": "loss", "content": 0.00012984655040781945, "timestamp": "2025-09-10 02:31:07.408412", "step": 5031, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:07.442315", "step": 5031, "epoch": 3 }, { "type": "loss", "content": 0.0006678973441012204, "timestamp": "2025-09-10 02:31:07.465816", "step": 5032, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:07.499085", "step": 5032, "epoch": 3 }, { "type": "loss", "content": 0.00010896347521338612, "timestamp": "2025-09-10 02:31:07.500935", "step": 5033, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:07.532984", "step": 5033, "epoch": 3 }, { "type": "loss", "content": 0.0009329610620625317, "timestamp": "2025-09-10 02:31:07.534984", "step": 5034, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:07.564448", "step": 5034, "epoch": 3 }, { "type": "loss", "content": 0.000349718815414235, "timestamp": "2025-09-10 02:31:07.566494", "step": 5035, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:07.595493", "step": 5035, "epoch": 3 }, { "type": "loss", "content": 0.000719129282515496, "timestamp": "2025-09-10 02:31:07.619043", "step": 5036, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:07.648144", "step": 5036, "epoch": 3 }, { "type": "loss", "content": 0.00318794883787632, "timestamp": "2025-09-10 02:31:07.650266", "step": 5037, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:07.682288", "step": 5037, "epoch": 3 }, { "type": "loss", "content": 0.004861722234636545, "timestamp": "2025-09-10 02:31:07.683836", "step": 5038, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:07.712365", "step": 5038, "epoch": 3 }, { "type": "loss", "content": 0.00029474907205440104, "timestamp": "2025-09-10 02:31:07.714293", "step": 5039, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:07.743357", "step": 5039, "epoch": 3 }, { "type": "loss", "content": 0.0002755603345576674, "timestamp": "2025-09-10 02:31:07.766662", "step": 5040, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:07.796324", "step": 5040, "epoch": 3 }, { "type": "loss", "content": 0.011947492137551308, "timestamp": "2025-09-10 02:31:07.798201", "step": 5041, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:07.828617", "step": 5041, "epoch": 3 }, { "type": "loss", "content": 0.0006149975233711302, "timestamp": "2025-09-10 02:31:07.838691", "step": 5042, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:07.868109", "step": 5042, "epoch": 3 }, { "type": "loss", "content": 0.00019832760153803974, "timestamp": "2025-09-10 02:31:07.870127", "step": 5043, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:07.900565", "step": 5043, "epoch": 3 }, { "type": "loss", "content": 0.009482159279286861, "timestamp": "2025-09-10 02:31:07.925916", "step": 5044, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:07.958591", "step": 5044, "epoch": 3 }, { "type": "loss", "content": 0.00039480425766669214, "timestamp": "2025-09-10 02:31:07.960939", "step": 5045, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:07.990159", "step": 5045, "epoch": 3 }, { "type": "loss", "content": 0.0005279025062918663, "timestamp": "2025-09-10 02:31:07.994009", "step": 5046, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:08.025986", "step": 5046, "epoch": 3 }, { "type": "loss", "content": 0.00013250982738099992, "timestamp": "2025-09-10 02:31:08.027773", "step": 5047, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:08.056314", "step": 5047, "epoch": 3 }, { "type": "loss", "content": 0.000527249532751739, "timestamp": "2025-09-10 02:31:08.079740", "step": 5048, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:08.108209", "step": 5048, "epoch": 3 }, { "type": "loss", "content": 0.00014435425691772252, "timestamp": "2025-09-10 02:31:08.110256", "step": 5049, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:08.138569", "step": 5049, "epoch": 3 }, { "type": "loss", "content": 0.0002105718303937465, "timestamp": "2025-09-10 02:31:08.140147", "step": 5050, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:08.169702", "step": 5050, "epoch": 3 }, { "type": "loss", "content": 0.0002794148458633572, "timestamp": "2025-09-10 02:31:08.171592", "step": 5051, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:08.200301", "step": 5051, "epoch": 3 }, { "type": "loss", "content": 0.0003587789833545685, "timestamp": "2025-09-10 02:31:08.223616", "step": 5052, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:08.260027", "step": 5052, "epoch": 3 }, { "type": "loss", "content": 0.0004125781706534326, "timestamp": "2025-09-10 02:31:08.262186", "step": 5053, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:08.291590", "step": 5053, "epoch": 3 }, { "type": "loss", "content": 0.0006383904255926609, "timestamp": "2025-09-10 02:31:08.295424", "step": 5054, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:08.324982", "step": 5054, "epoch": 3 }, { "type": "loss", "content": 0.00018965710478369147, "timestamp": "2025-09-10 02:31:08.326784", "step": 5055, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:08.355129", "step": 5055, "epoch": 3 }, { "type": "loss", "content": 0.0001069796271622181, "timestamp": "2025-09-10 02:31:08.379819", "step": 5056, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:08.408780", "step": 5056, "epoch": 3 }, { "type": "loss", "content": 0.008336128666996956, "timestamp": "2025-09-10 02:31:08.410834", "step": 5057, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:08.439485", "step": 5057, "epoch": 3 }, { "type": "loss", "content": 0.0013685652520507574, "timestamp": "2025-09-10 02:31:08.441502", "step": 5058, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:08.469981", "step": 5058, "epoch": 3 }, { "type": "loss", "content": 0.0024607598315924406, "timestamp": "2025-09-10 02:31:08.472033", "step": 5059, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:08.501167", "step": 5059, "epoch": 3 }, { "type": "loss", "content": 0.0005927715683355927, "timestamp": "2025-09-10 02:31:08.524742", "step": 5060, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:08.555430", "step": 5060, "epoch": 3 }, { "type": "loss", "content": 0.0002251004771096632, "timestamp": "2025-09-10 02:31:08.557287", "step": 5061, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:08.591780", "step": 5061, "epoch": 3 }, { "type": "loss", "content": 0.0005563314771279693, "timestamp": "2025-09-10 02:31:08.594109", "step": 5062, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:08.622772", "step": 5062, "epoch": 3 }, { "type": "loss", "content": 0.0013570614391937852, "timestamp": "2025-09-10 02:31:08.624570", "step": 5063, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:08.653133", "step": 5063, "epoch": 3 }, { "type": "loss", "content": 0.0001732300443109125, "timestamp": "2025-09-10 02:31:08.676428", "step": 5064, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:08.705689", "step": 5064, "epoch": 3 }, { "type": "loss", "content": 0.00969749130308628, "timestamp": "2025-09-10 02:31:08.707644", "step": 5065, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:08.736460", "step": 5065, "epoch": 3 }, { "type": "loss", "content": 0.00025736287352629006, "timestamp": "2025-09-10 02:31:08.738393", "step": 5066, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:08.771364", "step": 5066, "epoch": 3 }, { "type": "loss", "content": 0.002629172755405307, "timestamp": "2025-09-10 02:31:08.774514", "step": 5067, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:08.806106", "step": 5067, "epoch": 3 }, { "type": "loss", "content": 0.0007719130371697247, "timestamp": "2025-09-10 02:31:08.829450", "step": 5068, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:08.858879", "step": 5068, "epoch": 3 }, { "type": "loss", "content": 0.0004081004299223423, "timestamp": "2025-09-10 02:31:08.861702", "step": 5069, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:08.890626", "step": 5069, "epoch": 3 }, { "type": "loss", "content": 0.006041979882866144, "timestamp": "2025-09-10 02:31:08.892532", "step": 5070, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:08.920999", "step": 5070, "epoch": 3 }, { "type": "loss", "content": 0.002837598556652665, "timestamp": "2025-09-10 02:31:08.922989", "step": 5071, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:08.959416", "step": 5071, "epoch": 3 }, { "type": "loss", "content": 0.0034873499535024166, "timestamp": "2025-09-10 02:31:08.983717", "step": 5072, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:09.013499", "step": 5072, "epoch": 3 }, { "type": "loss", "content": 0.0001958427019417286, "timestamp": "2025-09-10 02:31:09.017855", "step": 5073, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:09.066465", "step": 5073, "epoch": 3 }, { "type": "loss", "content": 0.002457441296428442, "timestamp": "2025-09-10 02:31:09.071913", "step": 5074, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:09.107319", "step": 5074, "epoch": 3 }, { "type": "loss", "content": 0.029114514589309692, "timestamp": "2025-09-10 02:31:09.110183", "step": 5075, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:09.139123", "step": 5075, "epoch": 3 }, { "type": "loss", "content": 0.0008781243814155459, "timestamp": "2025-09-10 02:31:09.162593", "step": 5076, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:09.191139", "step": 5076, "epoch": 3 }, { "type": "loss", "content": 0.0006751882610842586, "timestamp": "2025-09-10 02:31:09.193204", "step": 5077, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:09.221614", "step": 5077, "epoch": 3 }, { "type": "loss", "content": 0.0052112252451479435, "timestamp": "2025-09-10 02:31:09.223384", "step": 5078, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:09.252144", "step": 5078, "epoch": 3 }, { "type": "loss", "content": 0.02491091564297676, "timestamp": "2025-09-10 02:31:09.255062", "step": 5079, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:09.285277", "step": 5079, "epoch": 3 }, { "type": "loss", "content": 0.03170705586671829, "timestamp": "2025-09-10 02:31:09.308787", "step": 5080, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:09.344060", "step": 5080, "epoch": 3 }, { "type": "loss", "content": 0.00020365203090477735, "timestamp": "2025-09-10 02:31:09.345952", "step": 5081, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:09.374932", "step": 5081, "epoch": 3 }, { "type": "loss", "content": 0.0001544384576845914, "timestamp": "2025-09-10 02:31:09.378225", "step": 5082, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:09.409239", "step": 5082, "epoch": 3 }, { "type": "loss", "content": 0.019298970699310303, "timestamp": "2025-09-10 02:31:09.411028", "step": 5083, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:09.439527", "step": 5083, "epoch": 3 }, { "type": "loss", "content": 0.0001749621151247993, "timestamp": "2025-09-10 02:31:09.462809", "step": 5084, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:09.491707", "step": 5084, "epoch": 3 }, { "type": "loss", "content": 0.000501964648719877, "timestamp": "2025-09-10 02:31:09.493501", "step": 5085, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:09.521884", "step": 5085, "epoch": 3 }, { "type": "loss", "content": 0.0014790799468755722, "timestamp": "2025-09-10 02:31:09.523935", "step": 5086, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:09.552743", "step": 5086, "epoch": 3 }, { "type": "loss", "content": 0.0002753040171228349, "timestamp": "2025-09-10 02:31:09.554508", "step": 5087, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:09.583298", "step": 5087, "epoch": 3 }, { "type": "loss", "content": 0.002824705094099045, "timestamp": "2025-09-10 02:31:09.607020", "step": 5088, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:09.635865", "step": 5088, "epoch": 3 }, { "type": "loss", "content": 0.0007388076628558338, "timestamp": "2025-09-10 02:31:09.638008", "step": 5089, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:09.668691", "step": 5089, "epoch": 3 }, { "type": "loss", "content": 0.0011144352611154318, "timestamp": "2025-09-10 02:31:09.670409", "step": 5090, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:09.699367", "step": 5090, "epoch": 3 }, { "type": "loss", "content": 0.01142920646816492, "timestamp": "2025-09-10 02:31:09.702213", "step": 5091, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:09.731067", "step": 5091, "epoch": 3 }, { "type": "loss", "content": 0.06258725374937057, "timestamp": "2025-09-10 02:31:09.754622", "step": 5092, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:09.784167", "step": 5092, "epoch": 3 }, { "type": "loss", "content": 0.0002580071159172803, "timestamp": "2025-09-10 02:31:09.786122", "step": 5093, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:09.815970", "step": 5093, "epoch": 3 }, { "type": "loss", "content": 7.947625999804586e-05, "timestamp": "2025-09-10 02:31:09.819740", "step": 5094, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:09.851858", "step": 5094, "epoch": 3 }, { "type": "loss", "content": 0.012915769591927528, "timestamp": "2025-09-10 02:31:09.853989", "step": 5095, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:09.887870", "step": 5095, "epoch": 3 }, { "type": "loss", "content": 0.040144938975572586, "timestamp": "2025-09-10 02:31:09.911380", "step": 5096, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:09.940619", "step": 5096, "epoch": 3 }, { "type": "loss", "content": 0.05270438641309738, "timestamp": "2025-09-10 02:31:09.942549", "step": 5097, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:09.974571", "step": 5097, "epoch": 3 }, { "type": "loss", "content": 0.02041652984917164, "timestamp": "2025-09-10 02:31:09.976535", "step": 5098, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:10.009654", "step": 5098, "epoch": 3 }, { "type": "loss", "content": 0.016050977632403374, "timestamp": "2025-09-10 02:31:10.012133", "step": 5099, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:10.051437", "step": 5099, "epoch": 3 }, { "type": "loss", "content": 0.001969614764675498, "timestamp": "2025-09-10 02:31:10.075030", "step": 5100, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:10.104149", "step": 5100, "epoch": 3 }, { "type": "loss", "content": 0.0016167467692866921, "timestamp": "2025-09-10 02:31:10.106029", "step": 5101, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:10.134850", "step": 5101, "epoch": 3 }, { "type": "loss", "content": 0.0035023626405745745, "timestamp": "2025-09-10 02:31:10.136730", "step": 5102, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:10.167536", "step": 5102, "epoch": 3 }, { "type": "loss", "content": 0.0002871211036108434, "timestamp": "2025-09-10 02:31:10.170108", "step": 5103, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:10.202275", "step": 5103, "epoch": 3 }, { "type": "loss", "content": 0.00033675372833386064, "timestamp": "2025-09-10 02:31:10.225964", "step": 5104, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:10.259639", "step": 5104, "epoch": 3 }, { "type": "loss", "content": 0.008695071563124657, "timestamp": "2025-09-10 02:31:10.263836", "step": 5105, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:10.292888", "step": 5105, "epoch": 3 }, { "type": "loss", "content": 0.017227793112397194, "timestamp": "2025-09-10 02:31:10.294960", "step": 5106, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:10.324198", "step": 5106, "epoch": 3 }, { "type": "loss", "content": 0.0003940062306355685, "timestamp": "2025-09-10 02:31:10.326225", "step": 5107, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:10.355060", "step": 5107, "epoch": 3 }, { "type": "loss", "content": 0.0002077590033877641, "timestamp": "2025-09-10 02:31:10.378658", "step": 5108, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:10.409881", "step": 5108, "epoch": 3 }, { "type": "loss", "content": 0.0076890503987669945, "timestamp": "2025-09-10 02:31:10.412865", "step": 5109, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:10.441878", "step": 5109, "epoch": 3 }, { "type": "loss", "content": 0.022206833586096764, "timestamp": "2025-09-10 02:31:10.444831", "step": 5110, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:10.476159", "step": 5110, "epoch": 3 }, { "type": "loss", "content": 0.008559671230614185, "timestamp": "2025-09-10 02:31:10.478122", "step": 5111, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:10.507470", "step": 5111, "epoch": 3 }, { "type": "loss", "content": 0.0028873595874756575, "timestamp": "2025-09-10 02:31:10.533094", "step": 5112, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:10.562208", "step": 5112, "epoch": 3 }, { "type": "loss", "content": 9.522762411506847e-05, "timestamp": "2025-09-10 02:31:10.564838", "step": 5113, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:10.593889", "step": 5113, "epoch": 3 }, { "type": "loss", "content": 0.004308689851313829, "timestamp": "2025-09-10 02:31:10.596276", "step": 5114, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:10.626008", "step": 5114, "epoch": 3 }, { "type": "loss", "content": 0.002052604453638196, "timestamp": "2025-09-10 02:31:10.628078", "step": 5115, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:10.657320", "step": 5115, "epoch": 3 }, { "type": "loss", "content": 0.02458536997437477, "timestamp": "2025-09-10 02:31:10.680984", "step": 5116, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:10.711612", "step": 5116, "epoch": 3 }, { "type": "loss", "content": 0.023380303755402565, "timestamp": "2025-09-10 02:31:10.713857", "step": 5117, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:10.754490", "step": 5117, "epoch": 3 }, { "type": "loss", "content": 0.05575161799788475, "timestamp": "2025-09-10 02:31:10.756618", "step": 5118, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:10.785358", "step": 5118, "epoch": 3 }, { "type": "loss", "content": 0.0006170102278701961, "timestamp": "2025-09-10 02:31:10.788518", "step": 5119, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:10.819088", "step": 5119, "epoch": 3 }, { "type": "loss", "content": 0.021255100145936012, "timestamp": "2025-09-10 02:31:10.842873", "step": 5120, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:10.873048", "step": 5120, "epoch": 3 }, { "type": "loss", "content": 0.0035779413301497698, "timestamp": "2025-09-10 02:31:10.874938", "step": 5121, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:10.905833", "step": 5121, "epoch": 3 }, { "type": "loss", "content": 0.0019906642846763134, "timestamp": "2025-09-10 02:31:10.908605", "step": 5122, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:10.937104", "step": 5122, "epoch": 3 }, { "type": "loss", "content": 0.023533621802926064, "timestamp": "2025-09-10 02:31:10.940226", "step": 5123, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:10.968752", "step": 5123, "epoch": 3 }, { "type": "loss", "content": 0.007398799993097782, "timestamp": "2025-09-10 02:31:10.992600", "step": 5124, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:11.022150", "step": 5124, "epoch": 3 }, { "type": "loss", "content": 0.06380916386842728, "timestamp": "2025-09-10 02:31:11.024219", "step": 5125, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:11.061488", "step": 5125, "epoch": 3 }, { "type": "loss", "content": 0.0009983654599636793, "timestamp": "2025-09-10 02:31:11.063442", "step": 5126, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:11.093207", "step": 5126, "epoch": 3 }, { "type": "loss", "content": 0.005230212118476629, "timestamp": "2025-09-10 02:31:11.095166", "step": 5127, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:11.124358", "step": 5127, "epoch": 3 }, { "type": "loss", "content": 0.0027204251382499933, "timestamp": "2025-09-10 02:31:11.149679", "step": 5128, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:11.181224", "step": 5128, "epoch": 3 }, { "type": "loss", "content": 0.004974003881216049, "timestamp": "2025-09-10 02:31:11.183023", "step": 5129, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:11.212086", "step": 5129, "epoch": 3 }, { "type": "loss", "content": 0.0012775195064023137, "timestamp": "2025-09-10 02:31:11.214239", "step": 5130, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:11.243816", "step": 5130, "epoch": 3 }, { "type": "loss", "content": 0.001391895697452128, "timestamp": "2025-09-10 02:31:11.252463", "step": 5131, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:11.282080", "step": 5131, "epoch": 3 }, { "type": "loss", "content": 0.028196966275572777, "timestamp": "2025-09-10 02:31:11.305462", "step": 5132, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:11.335006", "step": 5132, "epoch": 3 }, { "type": "loss", "content": 0.02282044105231762, "timestamp": "2025-09-10 02:31:11.337981", "step": 5133, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:11.369711", "step": 5133, "epoch": 3 }, { "type": "loss", "content": 0.014931939542293549, "timestamp": "2025-09-10 02:31:11.371625", "step": 5134, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:11.404273", "step": 5134, "epoch": 3 }, { "type": "loss", "content": 0.01002415083348751, "timestamp": "2025-09-10 02:31:11.406092", "step": 5135, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:11.436762", "step": 5135, "epoch": 3 }, { "type": "loss", "content": 0.0012437284458428621, "timestamp": "2025-09-10 02:31:11.460387", "step": 5136, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:11.492270", "step": 5136, "epoch": 3 }, { "type": "loss", "content": 0.002360876649618149, "timestamp": "2025-09-10 02:31:11.494335", "step": 5137, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:11.523343", "step": 5137, "epoch": 3 }, { "type": "loss", "content": 0.004048882517963648, "timestamp": "2025-09-10 02:31:11.525120", "step": 5138, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:11.553866", "step": 5138, "epoch": 3 }, { "type": "loss", "content": 0.000911208160687238, "timestamp": "2025-09-10 02:31:11.556320", "step": 5139, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:11.585475", "step": 5139, "epoch": 3 }, { "type": "loss", "content": 0.0005161958979442716, "timestamp": "2025-09-10 02:31:11.612272", "step": 5140, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:11.641484", "step": 5140, "epoch": 3 }, { "type": "loss", "content": 0.013862027786672115, "timestamp": "2025-09-10 02:31:11.643450", "step": 5141, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:11.672326", "step": 5141, "epoch": 3 }, { "type": "loss", "content": 0.004430785309523344, "timestamp": "2025-09-10 02:31:11.674396", "step": 5142, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:11.703388", "step": 5142, "epoch": 3 }, { "type": "loss", "content": 0.001230349880643189, "timestamp": "2025-09-10 02:31:11.706306", "step": 5143, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:11.742350", "step": 5143, "epoch": 3 }, { "type": "loss", "content": 0.0032119769603013992, "timestamp": "2025-09-10 02:31:11.766221", "step": 5144, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:11.796389", "step": 5144, "epoch": 3 }, { "type": "loss", "content": 0.00026887355488725007, "timestamp": "2025-09-10 02:31:11.798663", "step": 5145, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:11.827906", "step": 5145, "epoch": 3 }, { "type": "loss", "content": 0.011979511938989162, "timestamp": "2025-09-10 02:31:11.830033", "step": 5146, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:11.860555", "step": 5146, "epoch": 3 }, { "type": "loss", "content": 0.020315438508987427, "timestamp": "2025-09-10 02:31:11.862697", "step": 5147, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:11.891342", "step": 5147, "epoch": 3 }, { "type": "loss", "content": 0.004353826399892569, "timestamp": "2025-09-10 02:31:11.914762", "step": 5148, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:11.943945", "step": 5148, "epoch": 3 }, { "type": "loss", "content": 0.0077579873614013195, "timestamp": "2025-09-10 02:31:11.945921", "step": 5149, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:11.974708", "step": 5149, "epoch": 3 }, { "type": "loss", "content": 0.050322677940130234, "timestamp": "2025-09-10 02:31:11.976785", "step": 5150, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:12.007413", "step": 5150, "epoch": 3 }, { "type": "loss", "content": 0.010600102134048939, "timestamp": "2025-09-10 02:31:12.009507", "step": 5151, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:12.038723", "step": 5151, "epoch": 3 }, { "type": "loss", "content": 0.001150781405158341, "timestamp": "2025-09-10 02:31:12.064258", "step": 5152, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:12.093636", "step": 5152, "epoch": 3 }, { "type": "loss", "content": 0.015882886946201324, "timestamp": "2025-09-10 02:31:12.095299", "step": 5153, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:12.124352", "step": 5153, "epoch": 3 }, { "type": "loss", "content": 0.038372162729501724, "timestamp": "2025-09-10 02:31:12.126389", "step": 5154, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:12.155437", "step": 5154, "epoch": 3 }, { "type": "loss", "content": 0.005249445792287588, "timestamp": "2025-09-10 02:31:12.160743", "step": 5155, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:12.194367", "step": 5155, "epoch": 3 }, { "type": "loss", "content": 0.01985199749469757, "timestamp": "2025-09-10 02:31:12.225728", "step": 5156, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:12.254612", "step": 5156, "epoch": 3 }, { "type": "loss", "content": 0.0048411195166409016, "timestamp": "2025-09-10 02:31:12.256600", "step": 5157, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:12.285917", "step": 5157, "epoch": 3 }, { "type": "loss", "content": 0.000420969765400514, "timestamp": "2025-09-10 02:31:12.287665", "step": 5158, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:12.316673", "step": 5158, "epoch": 3 }, { "type": "loss", "content": 0.0017089421162381768, "timestamp": "2025-09-10 02:31:12.318529", "step": 5159, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:12.347811", "step": 5159, "epoch": 3 }, { "type": "loss", "content": 0.0075353109277784824, "timestamp": "2025-09-10 02:31:12.371468", "step": 5160, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:12.402929", "step": 5160, "epoch": 3 }, { "type": "loss", "content": 0.002512335777282715, "timestamp": "2025-09-10 02:31:12.405393", "step": 5161, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:12.435661", "step": 5161, "epoch": 3 }, { "type": "loss", "content": 0.01650133542716503, "timestamp": "2025-09-10 02:31:12.437789", "step": 5162, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:12.467321", "step": 5162, "epoch": 3 }, { "type": "loss", "content": 0.0005163501482456923, "timestamp": "2025-09-10 02:31:12.469340", "step": 5163, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:12.498776", "step": 5163, "epoch": 3 }, { "type": "loss", "content": 0.000299501174595207, "timestamp": "2025-09-10 02:31:12.522170", "step": 5164, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:12.554587", "step": 5164, "epoch": 3 }, { "type": "loss", "content": 0.0024083254393190145, "timestamp": "2025-09-10 02:31:12.556561", "step": 5165, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:12.585640", "step": 5165, "epoch": 3 }, { "type": "loss", "content": 0.023579344153404236, "timestamp": "2025-09-10 02:31:12.587683", "step": 5166, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:12.616248", "step": 5166, "epoch": 3 }, { "type": "loss", "content": 0.0071758790872991085, "timestamp": "2025-09-10 02:31:12.618218", "step": 5167, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:12.646592", "step": 5167, "epoch": 3 }, { "type": "loss", "content": 0.013538946397602558, "timestamp": "2025-09-10 02:31:12.670222", "step": 5168, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 96 ], "batch_size": 8, "flops": 1898404492032 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 5, 80 ], "batch_size": 8, "flops": 1582003754624 } ], "timestamp": "2025-09-10 02:31:15.226932", "step": 5168, "epoch": 3 }, { "type": "pplx", "content": 2436733.3643380217, "timestamp": "2025-09-10 02:31:15.228967", "step": 5168, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:15.260803", "step": 5168, "epoch": 3 }, { "type": "loss", "content": 0.00024558056611567736, "timestamp": "2025-09-10 02:31:15.262899", "step": 5169, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:15.292171", "step": 5169, "epoch": 3 }, { "type": "loss", "content": 0.0008377385092899203, "timestamp": "2025-09-10 02:31:15.296023", "step": 5170, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:15.325645", "step": 5170, "epoch": 3 }, { "type": "loss", "content": 0.016198571771383286, "timestamp": "2025-09-10 02:31:15.328746", "step": 5171, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:15.359603", "step": 5171, "epoch": 3 }, { "type": "loss", "content": 0.018108924850821495, "timestamp": "2025-09-10 02:31:15.384143", "step": 5172, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:15.413231", "step": 5172, "epoch": 3 }, { "type": "loss", "content": 0.0005432538455352187, "timestamp": "2025-09-10 02:31:15.415305", "step": 5173, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:15.444223", "step": 5173, "epoch": 3 }, { "type": "loss", "content": 0.0016497739125043154, "timestamp": "2025-09-10 02:31:15.446101", "step": 5174, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:15.474950", "step": 5174, "epoch": 3 }, { "type": "loss", "content": 0.018692251294851303, "timestamp": "2025-09-10 02:31:15.476870", "step": 5175, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:15.505903", "step": 5175, "epoch": 3 }, { "type": "loss", "content": 0.0012261820957064629, "timestamp": "2025-09-10 02:31:15.531316", "step": 5176, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:15.560387", "step": 5176, "epoch": 3 }, { "type": "loss", "content": 0.01112561859190464, "timestamp": "2025-09-10 02:31:15.562448", "step": 5177, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:15.592180", "step": 5177, "epoch": 3 }, { "type": "loss", "content": 0.012058419175446033, "timestamp": "2025-09-10 02:31:15.594155", "step": 5178, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:15.624368", "step": 5178, "epoch": 3 }, { "type": "loss", "content": 0.0004496954788919538, "timestamp": "2025-09-10 02:31:15.626189", "step": 5179, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:15.654933", "step": 5179, "epoch": 3 }, { "type": "loss", "content": 0.0007760432199575007, "timestamp": "2025-09-10 02:31:15.678476", "step": 5180, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:15.708930", "step": 5180, "epoch": 3 }, { "type": "loss", "content": 0.0017544291913509369, "timestamp": "2025-09-10 02:31:15.710981", "step": 5181, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:15.742898", "step": 5181, "epoch": 3 }, { "type": "loss", "content": 0.016934078186750412, "timestamp": "2025-09-10 02:31:15.744876", "step": 5182, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:15.774106", "step": 5182, "epoch": 3 }, { "type": "loss", "content": 0.0005306123639456928, "timestamp": "2025-09-10 02:31:15.776034", "step": 5183, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:15.804856", "step": 5183, "epoch": 3 }, { "type": "loss", "content": 0.03523048013448715, "timestamp": "2025-09-10 02:31:15.829350", "step": 5184, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:15.858363", "step": 5184, "epoch": 3 }, { "type": "loss", "content": 0.00044931474258191884, "timestamp": "2025-09-10 02:31:15.861656", "step": 5185, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:15.893036", "step": 5185, "epoch": 3 }, { "type": "loss", "content": 0.0012647055555135012, "timestamp": "2025-09-10 02:31:15.894987", "step": 5186, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:15.923710", "step": 5186, "epoch": 3 }, { "type": "loss", "content": 0.00022437133884523064, "timestamp": "2025-09-10 02:31:15.925585", "step": 5187, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:15.953947", "step": 5187, "epoch": 3 }, { "type": "loss", "content": 0.00028958715847693384, "timestamp": "2025-09-10 02:31:15.977244", "step": 5188, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:16.006262", "step": 5188, "epoch": 3 }, { "type": "loss", "content": 0.0008154022507369518, "timestamp": "2025-09-10 02:31:16.013030", "step": 5189, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:16.045272", "step": 5189, "epoch": 3 }, { "type": "loss", "content": 0.003911999054253101, "timestamp": "2025-09-10 02:31:16.047375", "step": 5190, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:16.077052", "step": 5190, "epoch": 3 }, { "type": "loss", "content": 0.00020245795894879848, "timestamp": "2025-09-10 02:31:16.079111", "step": 5191, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:16.107901", "step": 5191, "epoch": 3 }, { "type": "loss", "content": 0.0006830912898294628, "timestamp": "2025-09-10 02:31:16.131569", "step": 5192, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:16.160397", "step": 5192, "epoch": 3 }, { "type": "loss", "content": 0.000709925836417824, "timestamp": "2025-09-10 02:31:16.162461", "step": 5193, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:16.191699", "step": 5193, "epoch": 3 }, { "type": "loss", "content": 0.008646724745631218, "timestamp": "2025-09-10 02:31:16.193759", "step": 5194, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:16.222866", "step": 5194, "epoch": 3 }, { "type": "loss", "content": 0.00067809788743034, "timestamp": "2025-09-10 02:31:16.224883", "step": 5195, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:16.253496", "step": 5195, "epoch": 3 }, { "type": "loss", "content": 0.00020860283984802663, "timestamp": "2025-09-10 02:31:16.277025", "step": 5196, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:16.306117", "step": 5196, "epoch": 3 }, { "type": "loss", "content": 0.007869458757340908, "timestamp": "2025-09-10 02:31:16.308061", "step": 5197, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:16.337096", "step": 5197, "epoch": 3 }, { "type": "loss", "content": 0.0542362816631794, "timestamp": "2025-09-10 02:31:16.339078", "step": 5198, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:16.367853", "step": 5198, "epoch": 3 }, { "type": "loss", "content": 0.007059518247842789, "timestamp": "2025-09-10 02:31:16.369810", "step": 5199, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:16.398806", "step": 5199, "epoch": 3 }, { "type": "loss", "content": 0.0003244362014811486, "timestamp": "2025-09-10 02:31:16.422496", "step": 5200, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:16.452339", "step": 5200, "epoch": 3 }, { "type": "loss", "content": 0.013305050320923328, "timestamp": "2025-09-10 02:31:16.454187", "step": 5201, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:16.482981", "step": 5201, "epoch": 3 }, { "type": "loss", "content": 0.015022998675704002, "timestamp": "2025-09-10 02:31:16.484794", "step": 5202, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:16.515009", "step": 5202, "epoch": 3 }, { "type": "loss", "content": 0.0001668601034907624, "timestamp": "2025-09-10 02:31:16.516846", "step": 5203, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:16.551679", "step": 5203, "epoch": 3 }, { "type": "loss", "content": 0.00025806803023442626, "timestamp": "2025-09-10 02:31:16.575165", "step": 5204, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:16.603895", "step": 5204, "epoch": 3 }, { "type": "loss", "content": 0.013605805113911629, "timestamp": "2025-09-10 02:31:16.605943", "step": 5205, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:16.635001", "step": 5205, "epoch": 3 }, { "type": "loss", "content": 0.04866912215948105, "timestamp": "2025-09-10 02:31:16.636837", "step": 5206, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:16.665226", "step": 5206, "epoch": 3 }, { "type": "loss", "content": 0.0006265775300562382, "timestamp": "2025-09-10 02:31:16.667115", "step": 5207, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:16.696538", "step": 5207, "epoch": 3 }, { "type": "loss", "content": 0.008275379426777363, "timestamp": "2025-09-10 02:31:16.720946", "step": 5208, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:16.751174", "step": 5208, "epoch": 3 }, { "type": "loss", "content": 0.006404665298759937, "timestamp": "2025-09-10 02:31:16.753020", "step": 5209, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:16.781809", "step": 5209, "epoch": 3 }, { "type": "loss", "content": 0.053043730556964874, "timestamp": "2025-09-10 02:31:16.789799", "step": 5210, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:16.819930", "step": 5210, "epoch": 3 }, { "type": "loss", "content": 0.007864592596888542, "timestamp": "2025-09-10 02:31:16.822660", "step": 5211, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:16.852152", "step": 5211, "epoch": 3 }, { "type": "loss", "content": 0.0004012619028799236, "timestamp": "2025-09-10 02:31:16.875610", "step": 5212, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:16.906105", "step": 5212, "epoch": 3 }, { "type": "loss", "content": 0.00010462481441209093, "timestamp": "2025-09-10 02:31:16.907983", "step": 5213, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:16.936564", "step": 5213, "epoch": 3 }, { "type": "loss", "content": 0.002815719461068511, "timestamp": "2025-09-10 02:31:16.938555", "step": 5214, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:16.967324", "step": 5214, "epoch": 3 }, { "type": "loss", "content": 0.003537782933562994, "timestamp": "2025-09-10 02:31:16.969160", "step": 5215, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:17.004696", "step": 5215, "epoch": 3 }, { "type": "loss", "content": 0.005013471934944391, "timestamp": "2025-09-10 02:31:17.028151", "step": 5216, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:17.057071", "step": 5216, "epoch": 3 }, { "type": "loss", "content": 0.0006305575370788574, "timestamp": "2025-09-10 02:31:17.059579", "step": 5217, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:17.090166", "step": 5217, "epoch": 3 }, { "type": "loss", "content": 0.015557162463665009, "timestamp": "2025-09-10 02:31:17.092141", "step": 5218, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:17.122354", "step": 5218, "epoch": 3 }, { "type": "loss", "content": 0.005185294430702925, "timestamp": "2025-09-10 02:31:17.124241", "step": 5219, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:17.153459", "step": 5219, "epoch": 3 }, { "type": "loss", "content": 0.0010910272831097245, "timestamp": "2025-09-10 02:31:17.179637", "step": 5220, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:17.209356", "step": 5220, "epoch": 3 }, { "type": "loss", "content": 0.00012062443420290947, "timestamp": "2025-09-10 02:31:17.211365", "step": 5221, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:17.245458", "step": 5221, "epoch": 3 }, { "type": "loss", "content": 0.005589938256889582, "timestamp": "2025-09-10 02:31:17.247404", "step": 5222, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:17.275962", "step": 5222, "epoch": 3 }, { "type": "loss", "content": 0.01552930474281311, "timestamp": "2025-09-10 02:31:17.283244", "step": 5223, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:17.315771", "step": 5223, "epoch": 3 }, { "type": "loss", "content": 0.0014602139126509428, "timestamp": "2025-09-10 02:31:17.339146", "step": 5224, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:17.373490", "step": 5224, "epoch": 3 }, { "type": "loss", "content": 0.0019929243717342615, "timestamp": "2025-09-10 02:31:17.375354", "step": 5225, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:17.404301", "step": 5225, "epoch": 3 }, { "type": "loss", "content": 0.007471051067113876, "timestamp": "2025-09-10 02:31:17.406404", "step": 5226, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:17.435291", "step": 5226, "epoch": 3 }, { "type": "loss", "content": 0.003584515769034624, "timestamp": "2025-09-10 02:31:17.437216", "step": 5227, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:17.465910", "step": 5227, "epoch": 3 }, { "type": "loss", "content": 0.007189448922872543, "timestamp": "2025-09-10 02:31:17.490065", "step": 5228, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:17.519098", "step": 5228, "epoch": 3 }, { "type": "loss", "content": 0.0427686981856823, "timestamp": "2025-09-10 02:31:17.521138", "step": 5229, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:17.553698", "step": 5229, "epoch": 3 }, { "type": "loss", "content": 0.0003127622476313263, "timestamp": "2025-09-10 02:31:17.555539", "step": 5230, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:17.584885", "step": 5230, "epoch": 3 }, { "type": "loss", "content": 0.00016666494775563478, "timestamp": "2025-09-10 02:31:17.588150", "step": 5231, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:17.620373", "step": 5231, "epoch": 3 }, { "type": "loss", "content": 0.00034709740430116653, "timestamp": "2025-09-10 02:31:17.643883", "step": 5232, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:17.673210", "step": 5232, "epoch": 3 }, { "type": "loss", "content": 0.0011999139096587896, "timestamp": "2025-09-10 02:31:17.679007", "step": 5233, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:17.710817", "step": 5233, "epoch": 3 }, { "type": "loss", "content": 0.004059411119669676, "timestamp": "2025-09-10 02:31:17.712955", "step": 5234, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:17.741992", "step": 5234, "epoch": 3 }, { "type": "loss", "content": 4.660410922951996e-05, "timestamp": "2025-09-10 02:31:17.745814", "step": 5235, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:17.776182", "step": 5235, "epoch": 3 }, { "type": "loss", "content": 0.0003501230094116181, "timestamp": "2025-09-10 02:31:17.800536", "step": 5236, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:17.840391", "step": 5236, "epoch": 3 }, { "type": "loss", "content": 0.004818799439817667, "timestamp": "2025-09-10 02:31:17.843662", "step": 5237, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:17.877827", "step": 5237, "epoch": 3 }, { "type": "loss", "content": 0.007570463698357344, "timestamp": "2025-09-10 02:31:17.881350", "step": 5238, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:17.913806", "step": 5238, "epoch": 3 }, { "type": "loss", "content": 0.0011491699842736125, "timestamp": "2025-09-10 02:31:17.918251", "step": 5239, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:17.957604", "step": 5239, "epoch": 3 }, { "type": "loss", "content": 0.0004111050220672041, "timestamp": "2025-09-10 02:31:17.981099", "step": 5240, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:18.010200", "step": 5240, "epoch": 3 }, { "type": "loss", "content": 0.0013137703062966466, "timestamp": "2025-09-10 02:31:18.015201", "step": 5241, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:18.047185", "step": 5241, "epoch": 3 }, { "type": "loss", "content": 0.008169819600880146, "timestamp": "2025-09-10 02:31:18.049166", "step": 5242, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:18.080079", "step": 5242, "epoch": 3 }, { "type": "loss", "content": 0.02304341271519661, "timestamp": "2025-09-10 02:31:18.082139", "step": 5243, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:18.111525", "step": 5243, "epoch": 3 }, { "type": "loss", "content": 0.0034230987075716257, "timestamp": "2025-09-10 02:31:18.135008", "step": 5244, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:18.168534", "step": 5244, "epoch": 3 }, { "type": "loss", "content": 7.703209848841652e-05, "timestamp": "2025-09-10 02:31:18.170437", "step": 5245, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:18.199407", "step": 5245, "epoch": 3 }, { "type": "loss", "content": 0.00011362635996192694, "timestamp": "2025-09-10 02:31:18.201336", "step": 5246, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:18.231155", "step": 5246, "epoch": 3 }, { "type": "loss", "content": 0.0002412385365460068, "timestamp": "2025-09-10 02:31:18.233207", "step": 5247, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:18.262069", "step": 5247, "epoch": 3 }, { "type": "loss", "content": 0.003811098635196686, "timestamp": "2025-09-10 02:31:18.285668", "step": 5248, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:18.314575", "step": 5248, "epoch": 3 }, { "type": "loss", "content": 0.0002703253994695842, "timestamp": "2025-09-10 02:31:18.317087", "step": 5249, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:18.345929", "step": 5249, "epoch": 3 }, { "type": "loss", "content": 0.00048442385741509497, "timestamp": "2025-09-10 02:31:18.349944", "step": 5250, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:18.378979", "step": 5250, "epoch": 3 }, { "type": "loss", "content": 0.00034568950650282204, "timestamp": "2025-09-10 02:31:18.380940", "step": 5251, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:18.409402", "step": 5251, "epoch": 3 }, { "type": "loss", "content": 0.00010054224549094215, "timestamp": "2025-09-10 02:31:18.432673", "step": 5252, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:18.464971", "step": 5252, "epoch": 3 }, { "type": "loss", "content": 0.002652938012033701, "timestamp": "2025-09-10 02:31:18.466848", "step": 5253, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:18.495717", "step": 5253, "epoch": 3 }, { "type": "loss", "content": 0.00024533795658499, "timestamp": "2025-09-10 02:31:18.497892", "step": 5254, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:18.530480", "step": 5254, "epoch": 3 }, { "type": "loss", "content": 0.00039591133827343583, "timestamp": "2025-09-10 02:31:18.532446", "step": 5255, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:18.561769", "step": 5255, "epoch": 3 }, { "type": "loss", "content": 0.0004582552064675838, "timestamp": "2025-09-10 02:31:18.585207", "step": 5256, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:18.614450", "step": 5256, "epoch": 3 }, { "type": "loss", "content": 0.002351920586079359, "timestamp": "2025-09-10 02:31:18.616447", "step": 5257, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:18.645554", "step": 5257, "epoch": 3 }, { "type": "loss", "content": 0.0006441808654926717, "timestamp": "2025-09-10 02:31:18.647553", "step": 5258, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:18.676357", "step": 5258, "epoch": 3 }, { "type": "loss", "content": 0.0015084192855283618, "timestamp": "2025-09-10 02:31:18.678260", "step": 5259, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:18.707329", "step": 5259, "epoch": 3 }, { "type": "loss", "content": 0.007480281870812178, "timestamp": "2025-09-10 02:31:18.730777", "step": 5260, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:18.765026", "step": 5260, "epoch": 3 }, { "type": "loss", "content": 0.0005572711233980954, "timestamp": "2025-09-10 02:31:18.767053", "step": 5261, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:18.795958", "step": 5261, "epoch": 3 }, { "type": "loss", "content": 0.00013719707203563303, "timestamp": "2025-09-10 02:31:18.797997", "step": 5262, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:18.827324", "step": 5262, "epoch": 3 }, { "type": "loss", "content": 0.002512450562790036, "timestamp": "2025-09-10 02:31:18.830934", "step": 5263, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:18.863313", "step": 5263, "epoch": 3 }, { "type": "loss", "content": 0.0021838522516191006, "timestamp": "2025-09-10 02:31:18.888710", "step": 5264, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:18.918165", "step": 5264, "epoch": 3 }, { "type": "loss", "content": 0.0029657420236617327, "timestamp": "2025-09-10 02:31:18.920329", "step": 5265, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:18.953243", "step": 5265, "epoch": 3 }, { "type": "loss", "content": 0.00030763083486817777, "timestamp": "2025-09-10 02:31:18.956482", "step": 5266, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:18.986491", "step": 5266, "epoch": 3 }, { "type": "loss", "content": 0.004049286246299744, "timestamp": "2025-09-10 02:31:18.988433", "step": 5267, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:19.017144", "step": 5267, "epoch": 3 }, { "type": "loss", "content": 7.353690307354555e-05, "timestamp": "2025-09-10 02:31:19.041754", "step": 5268, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:19.070961", "step": 5268, "epoch": 3 }, { "type": "loss", "content": 7.854583964217454e-05, "timestamp": "2025-09-10 02:31:19.073049", "step": 5269, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:19.102430", "step": 5269, "epoch": 3 }, { "type": "loss", "content": 0.0026422517839819193, "timestamp": "2025-09-10 02:31:19.104551", "step": 5270, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:19.133378", "step": 5270, "epoch": 3 }, { "type": "loss", "content": 0.014817780815064907, "timestamp": "2025-09-10 02:31:19.135324", "step": 5271, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:19.164117", "step": 5271, "epoch": 3 }, { "type": "loss", "content": 0.001545860548503697, "timestamp": "2025-09-10 02:31:19.187985", "step": 5272, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:19.217188", "step": 5272, "epoch": 3 }, { "type": "loss", "content": 0.0019337693229317665, "timestamp": "2025-09-10 02:31:19.219435", "step": 5273, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:19.248426", "step": 5273, "epoch": 3 }, { "type": "loss", "content": 0.00020618803682737052, "timestamp": "2025-09-10 02:31:19.250457", "step": 5274, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:19.279890", "step": 5274, "epoch": 3 }, { "type": "loss", "content": 0.00018796950462274253, "timestamp": "2025-09-10 02:31:19.282053", "step": 5275, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:19.311019", "step": 5275, "epoch": 3 }, { "type": "loss", "content": 0.0075276815332472324, "timestamp": "2025-09-10 02:31:19.334589", "step": 5276, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:31:19.364301", "step": 5276, "epoch": 3 }, { "type": "loss", "content": 0.017516721040010452, "timestamp": "2025-09-10 02:31:19.366156", "step": 5277, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:19.394787", "step": 5277, "epoch": 3 }, { "type": "loss", "content": 0.00011237651779083535, "timestamp": "2025-09-10 02:31:19.396829", "step": 5278, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:19.425818", "step": 5278, "epoch": 3 }, { "type": "loss", "content": 0.00078305829083547, "timestamp": "2025-09-10 02:31:19.427429", "step": 5279, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:19.456639", "step": 5279, "epoch": 3 }, { "type": "loss", "content": 0.01742679253220558, "timestamp": "2025-09-10 02:31:19.481385", "step": 5280, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:19.510918", "step": 5280, "epoch": 3 }, { "type": "loss", "content": 0.006091821938753128, "timestamp": "2025-09-10 02:31:19.512717", "step": 5281, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:19.541618", "step": 5281, "epoch": 3 }, { "type": "loss", "content": 0.01619417779147625, "timestamp": "2025-09-10 02:31:19.543826", "step": 5282, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:19.572648", "step": 5282, "epoch": 3 }, { "type": "loss", "content": 0.02908501960337162, "timestamp": "2025-09-10 02:31:19.574463", "step": 5283, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:19.603516", "step": 5283, "epoch": 3 }, { "type": "loss", "content": 0.0011206221533939242, "timestamp": "2025-09-10 02:31:19.627157", "step": 5284, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:19.656075", "step": 5284, "epoch": 3 }, { "type": "loss", "content": 7.628348976140842e-05, "timestamp": "2025-09-10 02:31:19.658185", "step": 5285, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:19.687559", "step": 5285, "epoch": 3 }, { "type": "loss", "content": 7.452488353010267e-05, "timestamp": "2025-09-10 02:31:19.691340", "step": 5286, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:19.720388", "step": 5286, "epoch": 3 }, { "type": "loss", "content": 0.01618286408483982, "timestamp": "2025-09-10 02:31:19.722314", "step": 5287, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:19.753329", "step": 5287, "epoch": 3 }, { "type": "loss", "content": 0.00011036908108508214, "timestamp": "2025-09-10 02:31:19.776841", "step": 5288, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:19.811691", "step": 5288, "epoch": 3 }, { "type": "loss", "content": 0.005474313162267208, "timestamp": "2025-09-10 02:31:19.813569", "step": 5289, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:19.842327", "step": 5289, "epoch": 3 }, { "type": "loss", "content": 0.0004412243433762342, "timestamp": "2025-09-10 02:31:19.848291", "step": 5290, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:19.880027", "step": 5290, "epoch": 3 }, { "type": "loss", "content": 0.00018016780086327344, "timestamp": "2025-09-10 02:31:19.882325", "step": 5291, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:19.912422", "step": 5291, "epoch": 3 }, { "type": "loss", "content": 0.02032560668885708, "timestamp": "2025-09-10 02:31:19.936006", "step": 5292, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:19.966711", "step": 5292, "epoch": 3 }, { "type": "loss", "content": 0.00012549829261843115, "timestamp": "2025-09-10 02:31:19.968835", "step": 5293, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:19.999016", "step": 5293, "epoch": 3 }, { "type": "loss", "content": 0.0002709024411160499, "timestamp": "2025-09-10 02:31:20.001308", "step": 5294, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:20.030992", "step": 5294, "epoch": 3 }, { "type": "loss", "content": 0.033939070999622345, "timestamp": "2025-09-10 02:31:20.033124", "step": 5295, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:20.062470", "step": 5295, "epoch": 3 }, { "type": "loss", "content": 0.04416469484567642, "timestamp": "2025-09-10 02:31:20.086154", "step": 5296, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:20.115566", "step": 5296, "epoch": 3 }, { "type": "loss", "content": 0.00032265594927594066, "timestamp": "2025-09-10 02:31:20.121141", "step": 5297, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:20.160260", "step": 5297, "epoch": 3 }, { "type": "loss", "content": 0.005029712338000536, "timestamp": "2025-09-10 02:31:20.162311", "step": 5298, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:20.193516", "step": 5298, "epoch": 3 }, { "type": "loss", "content": 0.009289773181080818, "timestamp": "2025-09-10 02:31:20.197798", "step": 5299, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:20.235325", "step": 5299, "epoch": 3 }, { "type": "loss", "content": 8.617424464318901e-05, "timestamp": "2025-09-10 02:31:20.259118", "step": 5300, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:20.288455", "step": 5300, "epoch": 3 }, { "type": "loss", "content": 0.0001644386356929317, "timestamp": "2025-09-10 02:31:20.290912", "step": 5301, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:20.322936", "step": 5301, "epoch": 3 }, { "type": "loss", "content": 8.05677191237919e-05, "timestamp": "2025-09-10 02:31:20.325139", "step": 5302, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:20.355612", "step": 5302, "epoch": 3 }, { "type": "loss", "content": 0.05833953246474266, "timestamp": "2025-09-10 02:31:20.367871", "step": 5303, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:20.401700", "step": 5303, "epoch": 3 }, { "type": "loss", "content": 0.0016823047772049904, "timestamp": "2025-09-10 02:31:20.425730", "step": 5304, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:20.454809", "step": 5304, "epoch": 3 }, { "type": "loss", "content": 0.0006739491946063936, "timestamp": "2025-09-10 02:31:20.456751", "step": 5305, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:20.487365", "step": 5305, "epoch": 3 }, { "type": "loss", "content": 0.001487448113039136, "timestamp": "2025-09-10 02:31:20.490804", "step": 5306, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:20.519659", "step": 5306, "epoch": 3 }, { "type": "loss", "content": 0.006275292951613665, "timestamp": "2025-09-10 02:31:20.521833", "step": 5307, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:20.551153", "step": 5307, "epoch": 3 }, { "type": "loss", "content": 0.00020399487402755767, "timestamp": "2025-09-10 02:31:20.574851", "step": 5308, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:20.609004", "step": 5308, "epoch": 3 }, { "type": "loss", "content": 7.673249638173729e-05, "timestamp": "2025-09-10 02:31:20.610806", "step": 5309, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:20.639468", "step": 5309, "epoch": 3 }, { "type": "loss", "content": 0.0006607953691855073, "timestamp": "2025-09-10 02:31:20.641553", "step": 5310, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:20.670452", "step": 5310, "epoch": 3 }, { "type": "loss", "content": 0.000300820596748963, "timestamp": "2025-09-10 02:31:20.672538", "step": 5311, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:20.701310", "step": 5311, "epoch": 3 }, { "type": "loss", "content": 0.00024385188589803874, "timestamp": "2025-09-10 02:31:20.724949", "step": 5312, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:20.754340", "step": 5312, "epoch": 3 }, { "type": "loss", "content": 0.01962227001786232, "timestamp": "2025-09-10 02:31:20.757731", "step": 5313, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:20.789592", "step": 5313, "epoch": 3 }, { "type": "loss", "content": 0.0009850615169852972, "timestamp": "2025-09-10 02:31:20.791832", "step": 5314, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:20.823986", "step": 5314, "epoch": 3 }, { "type": "loss", "content": 0.0003341401170473546, "timestamp": "2025-09-10 02:31:20.826172", "step": 5315, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:20.855012", "step": 5315, "epoch": 3 }, { "type": "loss", "content": 0.005033438093960285, "timestamp": "2025-09-10 02:31:20.880170", "step": 5316, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:20.910178", "step": 5316, "epoch": 3 }, { "type": "loss", "content": 0.013828927651047707, "timestamp": "2025-09-10 02:31:20.912251", "step": 5317, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:20.942083", "step": 5317, "epoch": 3 }, { "type": "loss", "content": 0.0004040444327984005, "timestamp": "2025-09-10 02:31:20.944122", "step": 5318, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:20.976867", "step": 5318, "epoch": 3 }, { "type": "loss", "content": 0.00015366276784334332, "timestamp": "2025-09-10 02:31:20.979054", "step": 5319, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:21.007707", "step": 5319, "epoch": 3 }, { "type": "loss", "content": 0.0336005724966526, "timestamp": "2025-09-10 02:31:21.031112", "step": 5320, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 96 ], "batch_size": 8, "flops": 1898404492032 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 5, 80 ], "batch_size": 8, "flops": 1582003754624 } ], "timestamp": "2025-09-10 02:31:23.033008", "step": 5320, "epoch": 3 }, { "type": "pplx", "content": 2551079.3306668666, "timestamp": "2025-09-10 02:31:23.035009", "step": 5320, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:23.062383", "step": 5320, "epoch": 3 }, { "type": "loss", "content": 0.005831195507198572, "timestamp": "2025-09-10 02:31:23.064508", "step": 5321, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:23.093977", "step": 5321, "epoch": 3 }, { "type": "loss", "content": 0.002231104066595435, "timestamp": "2025-09-10 02:31:23.098600", "step": 5322, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:23.135118", "step": 5322, "epoch": 3 }, { "type": "loss", "content": 0.0038797142915427685, "timestamp": "2025-09-10 02:31:23.137440", "step": 5323, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:23.166399", "step": 5323, "epoch": 3 }, { "type": "loss", "content": 0.009675068780779839, "timestamp": "2025-09-10 02:31:23.190502", "step": 5324, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:23.220054", "step": 5324, "epoch": 3 }, { "type": "loss", "content": 0.017302149906754494, "timestamp": "2025-09-10 02:31:23.225215", "step": 5325, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:23.256571", "step": 5325, "epoch": 3 }, { "type": "loss", "content": 0.0034467519726604223, "timestamp": "2025-09-10 02:31:23.259000", "step": 5326, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:23.288559", "step": 5326, "epoch": 3 }, { "type": "loss", "content": 0.00029501141398213804, "timestamp": "2025-09-10 02:31:23.290692", "step": 5327, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:23.319613", "step": 5327, "epoch": 3 }, { "type": "loss", "content": 0.05141686275601387, "timestamp": "2025-09-10 02:31:23.343802", "step": 5328, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:23.378494", "step": 5328, "epoch": 3 }, { "type": "loss", "content": 0.0002961040590889752, "timestamp": "2025-09-10 02:31:23.380572", "step": 5329, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:23.413316", "step": 5329, "epoch": 3 }, { "type": "loss", "content": 0.0018813759088516235, "timestamp": "2025-09-10 02:31:23.415832", "step": 5330, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:23.444493", "step": 5330, "epoch": 3 }, { "type": "loss", "content": 0.0022770597133785486, "timestamp": "2025-09-10 02:31:23.446223", "step": 5331, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:23.479983", "step": 5331, "epoch": 3 }, { "type": "loss", "content": 0.016446303576231003, "timestamp": "2025-09-10 02:31:23.506221", "step": 5332, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:23.535570", "step": 5332, "epoch": 3 }, { "type": "loss", "content": 0.0007620741962455213, "timestamp": "2025-09-10 02:31:23.540073", "step": 5333, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:23.570257", "step": 5333, "epoch": 3 }, { "type": "loss", "content": 0.0005776284378953278, "timestamp": "2025-09-10 02:31:23.572491", "step": 5334, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:23.606212", "step": 5334, "epoch": 3 }, { "type": "loss", "content": 0.017588842660188675, "timestamp": "2025-09-10 02:31:23.608382", "step": 5335, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:23.642529", "step": 5335, "epoch": 3 }, { "type": "loss", "content": 0.00035538533120416105, "timestamp": "2025-09-10 02:31:23.666123", "step": 5336, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:23.704544", "step": 5336, "epoch": 3 }, { "type": "loss", "content": 0.019263219088315964, "timestamp": "2025-09-10 02:31:23.706683", "step": 5337, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:23.745640", "step": 5337, "epoch": 3 }, { "type": "loss", "content": 0.04862800985574722, "timestamp": "2025-09-10 02:31:23.748205", "step": 5338, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:23.781046", "step": 5338, "epoch": 3 }, { "type": "loss", "content": 0.002783591626212001, "timestamp": "2025-09-10 02:31:23.783018", "step": 5339, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:23.812930", "step": 5339, "epoch": 3 }, { "type": "loss", "content": 0.0006132293492555618, "timestamp": "2025-09-10 02:31:23.837116", "step": 5340, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:23.867392", "step": 5340, "epoch": 3 }, { "type": "loss", "content": 0.0003858158888760954, "timestamp": "2025-09-10 02:31:23.869425", "step": 5341, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:23.898673", "step": 5341, "epoch": 3 }, { "type": "loss", "content": 0.0003180743078701198, "timestamp": "2025-09-10 02:31:23.900788", "step": 5342, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:23.929420", "step": 5342, "epoch": 3 }, { "type": "loss", "content": 0.012362735345959663, "timestamp": "2025-09-10 02:31:23.931464", "step": 5343, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:23.960404", "step": 5343, "epoch": 3 }, { "type": "loss", "content": 0.006400517653673887, "timestamp": "2025-09-10 02:31:23.984040", "step": 5344, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:24.014733", "step": 5344, "epoch": 3 }, { "type": "loss", "content": 0.0027917451225221157, "timestamp": "2025-09-10 02:31:24.017888", "step": 5345, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:24.048798", "step": 5345, "epoch": 3 }, { "type": "loss", "content": 0.013821698725223541, "timestamp": "2025-09-10 02:31:24.050810", "step": 5346, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:24.084439", "step": 5346, "epoch": 3 }, { "type": "loss", "content": 0.002162518445402384, "timestamp": "2025-09-10 02:31:24.086651", "step": 5347, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:24.116500", "step": 5347, "epoch": 3 }, { "type": "loss", "content": 0.00018192424613516778, "timestamp": "2025-09-10 02:31:24.141334", "step": 5348, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:24.170338", "step": 5348, "epoch": 3 }, { "type": "loss", "content": 0.0048463004641234875, "timestamp": "2025-09-10 02:31:24.173256", "step": 5349, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:24.203038", "step": 5349, "epoch": 3 }, { "type": "loss", "content": 0.003790907561779022, "timestamp": "2025-09-10 02:31:24.224082", "step": 5350, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:24.254891", "step": 5350, "epoch": 3 }, { "type": "loss", "content": 0.00026786929811351, "timestamp": "2025-09-10 02:31:24.258520", "step": 5351, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:24.287612", "step": 5351, "epoch": 3 }, { "type": "loss", "content": 0.028449947014451027, "timestamp": "2025-09-10 02:31:24.312694", "step": 5352, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:24.359701", "step": 5352, "epoch": 3 }, { "type": "loss", "content": 0.006347959395498037, "timestamp": "2025-09-10 02:31:24.361584", "step": 5353, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:24.395475", "step": 5353, "epoch": 3 }, { "type": "loss", "content": 0.010898602195084095, "timestamp": "2025-09-10 02:31:24.397680", "step": 5354, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:24.426488", "step": 5354, "epoch": 3 }, { "type": "loss", "content": 0.0015070537338033319, "timestamp": "2025-09-10 02:31:24.428418", "step": 5355, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:24.457631", "step": 5355, "epoch": 3 }, { "type": "loss", "content": 0.0019601122476160526, "timestamp": "2025-09-10 02:31:24.481157", "step": 5356, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:24.510438", "step": 5356, "epoch": 3 }, { "type": "loss", "content": 0.002505767857655883, "timestamp": "2025-09-10 02:31:24.515852", "step": 5357, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:24.544584", "step": 5357, "epoch": 3 }, { "type": "loss", "content": 0.04163316637277603, "timestamp": "2025-09-10 02:31:24.546994", "step": 5358, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:24.578752", "step": 5358, "epoch": 3 }, { "type": "loss", "content": 0.011395329609513283, "timestamp": "2025-09-10 02:31:24.580942", "step": 5359, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:24.614969", "step": 5359, "epoch": 3 }, { "type": "loss", "content": 0.0008985429303720593, "timestamp": "2025-09-10 02:31:24.639625", "step": 5360, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:24.668878", "step": 5360, "epoch": 3 }, { "type": "loss", "content": 0.028808802366256714, "timestamp": "2025-09-10 02:31:24.672278", "step": 5361, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:24.701338", "step": 5361, "epoch": 3 }, { "type": "loss", "content": 0.0006094464915804565, "timestamp": "2025-09-10 02:31:24.703520", "step": 5362, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:24.738751", "step": 5362, "epoch": 3 }, { "type": "loss", "content": 0.006529428996145725, "timestamp": "2025-09-10 02:31:24.740914", "step": 5363, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:24.785758", "step": 5363, "epoch": 3 }, { "type": "loss", "content": 0.004220775328576565, "timestamp": "2025-09-10 02:31:24.809807", "step": 5364, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:24.839278", "step": 5364, "epoch": 3 }, { "type": "loss", "content": 0.04075964167714119, "timestamp": "2025-09-10 02:31:24.842825", "step": 5365, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:24.876161", "step": 5365, "epoch": 3 }, { "type": "loss", "content": 0.009647761471569538, "timestamp": "2025-09-10 02:31:24.882712", "step": 5366, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:24.912694", "step": 5366, "epoch": 3 }, { "type": "loss", "content": 0.0018993124831467867, "timestamp": "2025-09-10 02:31:24.915139", "step": 5367, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:24.944155", "step": 5367, "epoch": 3 }, { "type": "loss", "content": 0.0010295561514794827, "timestamp": "2025-09-10 02:31:24.967567", "step": 5368, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:24.998242", "step": 5368, "epoch": 3 }, { "type": "loss", "content": 0.0020003027748316526, "timestamp": "2025-09-10 02:31:25.002331", "step": 5369, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:25.040875", "step": 5369, "epoch": 3 }, { "type": "loss", "content": 0.0005581422592513263, "timestamp": "2025-09-10 02:31:25.044665", "step": 5370, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:25.074439", "step": 5370, "epoch": 3 }, { "type": "loss", "content": 0.0026471889577805996, "timestamp": "2025-09-10 02:31:25.076301", "step": 5371, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:25.108638", "step": 5371, "epoch": 3 }, { "type": "loss", "content": 0.023278558626770973, "timestamp": "2025-09-10 02:31:25.133488", "step": 5372, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:25.163686", "step": 5372, "epoch": 3 }, { "type": "loss", "content": 0.000599853228777647, "timestamp": "2025-09-10 02:31:25.165653", "step": 5373, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:25.194542", "step": 5373, "epoch": 3 }, { "type": "loss", "content": 0.0011137856636196375, "timestamp": "2025-09-10 02:31:25.197585", "step": 5374, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:25.228242", "step": 5374, "epoch": 3 }, { "type": "loss", "content": 0.014667009934782982, "timestamp": "2025-09-10 02:31:25.230258", "step": 5375, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:25.261129", "step": 5375, "epoch": 3 }, { "type": "loss", "content": 0.008366279304027557, "timestamp": "2025-09-10 02:31:25.288421", "step": 5376, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:25.317526", "step": 5376, "epoch": 3 }, { "type": "loss", "content": 0.0007964317337609828, "timestamp": "2025-09-10 02:31:25.319657", "step": 5377, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:25.364801", "step": 5377, "epoch": 3 }, { "type": "loss", "content": 0.009769702330231667, "timestamp": "2025-09-10 02:31:25.366897", "step": 5378, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:25.398327", "step": 5378, "epoch": 3 }, { "type": "loss", "content": 0.0003478755825199187, "timestamp": "2025-09-10 02:31:25.401384", "step": 5379, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:25.436273", "step": 5379, "epoch": 3 }, { "type": "loss", "content": 0.005425657145678997, "timestamp": "2025-09-10 02:31:25.463427", "step": 5380, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:25.498521", "step": 5380, "epoch": 3 }, { "type": "loss", "content": 0.006446263287216425, "timestamp": "2025-09-10 02:31:25.505037", "step": 5381, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:25.537189", "step": 5381, "epoch": 3 }, { "type": "loss", "content": 0.0004652982752304524, "timestamp": "2025-09-10 02:31:25.539203", "step": 5382, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:25.569171", "step": 5382, "epoch": 3 }, { "type": "loss", "content": 0.0002952113572973758, "timestamp": "2025-09-10 02:31:25.570955", "step": 5383, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:25.600426", "step": 5383, "epoch": 3 }, { "type": "loss", "content": 0.0017549424665048718, "timestamp": "2025-09-10 02:31:25.625726", "step": 5384, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:25.655258", "step": 5384, "epoch": 3 }, { "type": "loss", "content": 0.003002666402608156, "timestamp": "2025-09-10 02:31:25.657887", "step": 5385, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:25.691141", "step": 5385, "epoch": 3 }, { "type": "loss", "content": 0.010298574343323708, "timestamp": "2025-09-10 02:31:25.693059", "step": 5386, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:25.724195", "step": 5386, "epoch": 3 }, { "type": "loss", "content": 0.00422916142269969, "timestamp": "2025-09-10 02:31:25.726089", "step": 5387, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:25.759233", "step": 5387, "epoch": 3 }, { "type": "loss", "content": 0.0069030639715492725, "timestamp": "2025-09-10 02:31:25.782721", "step": 5388, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:25.811771", "step": 5388, "epoch": 3 }, { "type": "loss", "content": 0.0004885837552137673, "timestamp": "2025-09-10 02:31:25.813703", "step": 5389, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:25.845112", "step": 5389, "epoch": 3 }, { "type": "loss", "content": 0.011209973134100437, "timestamp": "2025-09-10 02:31:25.847313", "step": 5390, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:25.883635", "step": 5390, "epoch": 3 }, { "type": "loss", "content": 0.00047788023948669434, "timestamp": "2025-09-10 02:31:25.886748", "step": 5391, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:25.922538", "step": 5391, "epoch": 3 }, { "type": "loss", "content": 0.0013479841873049736, "timestamp": "2025-09-10 02:31:25.946145", "step": 5392, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:25.976653", "step": 5392, "epoch": 3 }, { "type": "loss", "content": 0.0030322824604809284, "timestamp": "2025-09-10 02:31:25.979194", "step": 5393, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:26.010820", "step": 5393, "epoch": 3 }, { "type": "loss", "content": 0.0008384265820495784, "timestamp": "2025-09-10 02:31:26.013324", "step": 5394, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:26.043891", "step": 5394, "epoch": 3 }, { "type": "loss", "content": 0.0038477268535643816, "timestamp": "2025-09-10 02:31:26.045876", "step": 5395, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:26.075628", "step": 5395, "epoch": 3 }, { "type": "loss", "content": 0.005468371324241161, "timestamp": "2025-09-10 02:31:26.100339", "step": 5396, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:26.128968", "step": 5396, "epoch": 3 }, { "type": "loss", "content": 0.0058067962527275085, "timestamp": "2025-09-10 02:31:26.131334", "step": 5397, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:26.160638", "step": 5397, "epoch": 3 }, { "type": "loss", "content": 0.009803896769881248, "timestamp": "2025-09-10 02:31:26.162433", "step": 5398, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:26.191702", "step": 5398, "epoch": 3 }, { "type": "loss", "content": 0.0013640226097777486, "timestamp": "2025-09-10 02:31:26.198297", "step": 5399, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:26.232493", "step": 5399, "epoch": 3 }, { "type": "loss", "content": 0.00018000802083406597, "timestamp": "2025-09-10 02:31:26.255848", "step": 5400, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:26.288083", "step": 5400, "epoch": 3 }, { "type": "loss", "content": 0.01599857583642006, "timestamp": "2025-09-10 02:31:26.290038", "step": 5401, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:26.318620", "step": 5401, "epoch": 3 }, { "type": "loss", "content": 0.013749159872531891, "timestamp": "2025-09-10 02:31:26.320767", "step": 5402, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:26.351926", "step": 5402, "epoch": 3 }, { "type": "loss", "content": 0.0023441340308636427, "timestamp": "2025-09-10 02:31:26.355992", "step": 5403, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:26.385630", "step": 5403, "epoch": 3 }, { "type": "loss", "content": 0.001614662236534059, "timestamp": "2025-09-10 02:31:26.411454", "step": 5404, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:26.440668", "step": 5404, "epoch": 3 }, { "type": "loss", "content": 0.001221572863869369, "timestamp": "2025-09-10 02:31:26.443952", "step": 5405, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:26.473381", "step": 5405, "epoch": 3 }, { "type": "loss", "content": 0.0012589620891958475, "timestamp": "2025-09-10 02:31:26.475347", "step": 5406, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:26.504461", "step": 5406, "epoch": 3 }, { "type": "loss", "content": 0.005226811859756708, "timestamp": "2025-09-10 02:31:26.508840", "step": 5407, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:26.538014", "step": 5407, "epoch": 3 }, { "type": "loss", "content": 0.0004326138296164572, "timestamp": "2025-09-10 02:31:26.561660", "step": 5408, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:26.597166", "step": 5408, "epoch": 3 }, { "type": "loss", "content": 0.0002521525020711124, "timestamp": "2025-09-10 02:31:26.599529", "step": 5409, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:26.629630", "step": 5409, "epoch": 3 }, { "type": "loss", "content": 0.0007950930739752948, "timestamp": "2025-09-10 02:31:26.632036", "step": 5410, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:26.661393", "step": 5410, "epoch": 3 }, { "type": "loss", "content": 0.00030989127117209136, "timestamp": "2025-09-10 02:31:26.663523", "step": 5411, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:26.692967", "step": 5411, "epoch": 3 }, { "type": "loss", "content": 0.0004943537642247975, "timestamp": "2025-09-10 02:31:26.717318", "step": 5412, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:26.747413", "step": 5412, "epoch": 3 }, { "type": "loss", "content": 0.0009193782461807132, "timestamp": "2025-09-10 02:31:26.753106", "step": 5413, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:26.783751", "step": 5413, "epoch": 3 }, { "type": "loss", "content": 0.03788406029343605, "timestamp": "2025-09-10 02:31:26.785876", "step": 5414, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:26.823038", "step": 5414, "epoch": 3 }, { "type": "loss", "content": 0.0005143205635249615, "timestamp": "2025-09-10 02:31:26.826943", "step": 5415, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:26.857698", "step": 5415, "epoch": 3 }, { "type": "loss", "content": 0.00025871643447317183, "timestamp": "2025-09-10 02:31:26.881250", "step": 5416, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:26.911551", "step": 5416, "epoch": 3 }, { "type": "loss", "content": 0.003082282841205597, "timestamp": "2025-09-10 02:31:26.913811", "step": 5417, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:26.950935", "step": 5417, "epoch": 3 }, { "type": "loss", "content": 0.0006056904676370323, "timestamp": "2025-09-10 02:31:26.954099", "step": 5418, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:26.982838", "step": 5418, "epoch": 3 }, { "type": "loss", "content": 0.001431368524208665, "timestamp": "2025-09-10 02:31:26.985716", "step": 5419, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:27.024474", "step": 5419, "epoch": 3 }, { "type": "loss", "content": 0.0009165522642433643, "timestamp": "2025-09-10 02:31:27.054829", "step": 5420, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:27.085200", "step": 5420, "epoch": 3 }, { "type": "loss", "content": 0.002991889836266637, "timestamp": "2025-09-10 02:31:27.087406", "step": 5421, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:27.116902", "step": 5421, "epoch": 3 }, { "type": "loss", "content": 0.0011327136307954788, "timestamp": "2025-09-10 02:31:27.118860", "step": 5422, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:27.150621", "step": 5422, "epoch": 3 }, { "type": "loss", "content": 0.00021207370446063578, "timestamp": "2025-09-10 02:31:27.158499", "step": 5423, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:27.191042", "step": 5423, "epoch": 3 }, { "type": "loss", "content": 0.0027875422965735197, "timestamp": "2025-09-10 02:31:27.216448", "step": 5424, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:27.246280", "step": 5424, "epoch": 3 }, { "type": "loss", "content": 0.0001328000653302297, "timestamp": "2025-09-10 02:31:27.248384", "step": 5425, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:27.278931", "step": 5425, "epoch": 3 }, { "type": "loss", "content": 0.0025467739906162024, "timestamp": "2025-09-10 02:31:27.281134", "step": 5426, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:27.311445", "step": 5426, "epoch": 3 }, { "type": "loss", "content": 0.0028369533829391003, "timestamp": "2025-09-10 02:31:27.313875", "step": 5427, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:27.342705", "step": 5427, "epoch": 3 }, { "type": "loss", "content": 0.010322848334908485, "timestamp": "2025-09-10 02:31:27.370233", "step": 5428, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:27.399460", "step": 5428, "epoch": 3 }, { "type": "loss", "content": 0.0004985965206287801, "timestamp": "2025-09-10 02:31:27.401479", "step": 5429, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:27.442792", "step": 5429, "epoch": 3 }, { "type": "loss", "content": 0.0024056960828602314, "timestamp": "2025-09-10 02:31:27.444562", "step": 5430, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:27.473692", "step": 5430, "epoch": 3 }, { "type": "loss", "content": 0.0010101028019562364, "timestamp": "2025-09-10 02:31:27.475638", "step": 5431, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:27.504217", "step": 5431, "epoch": 3 }, { "type": "loss", "content": 0.0004391186812426895, "timestamp": "2025-09-10 02:31:27.531582", "step": 5432, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:27.560409", "step": 5432, "epoch": 3 }, { "type": "loss", "content": 0.0015307767316699028, "timestamp": "2025-09-10 02:31:27.562282", "step": 5433, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:27.593316", "step": 5433, "epoch": 3 }, { "type": "loss", "content": 0.009664238430559635, "timestamp": "2025-09-10 02:31:27.596673", "step": 5434, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:27.629062", "step": 5434, "epoch": 3 }, { "type": "loss", "content": 0.045008838176727295, "timestamp": "2025-09-10 02:31:27.631117", "step": 5435, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:27.663540", "step": 5435, "epoch": 3 }, { "type": "loss", "content": 0.0011996644316241145, "timestamp": "2025-09-10 02:31:27.687543", "step": 5436, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:27.716411", "step": 5436, "epoch": 3 }, { "type": "loss", "content": 0.0003019919095095247, "timestamp": "2025-09-10 02:31:27.718376", "step": 5437, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:27.749022", "step": 5437, "epoch": 3 }, { "type": "loss", "content": 0.004851988051086664, "timestamp": "2025-09-10 02:31:27.751149", "step": 5438, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:27.780038", "step": 5438, "epoch": 3 }, { "type": "loss", "content": 0.00012647596304304898, "timestamp": "2025-09-10 02:31:27.782089", "step": 5439, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:27.810878", "step": 5439, "epoch": 3 }, { "type": "loss", "content": 9.743525879457593e-05, "timestamp": "2025-09-10 02:31:27.834312", "step": 5440, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:27.865423", "step": 5440, "epoch": 3 }, { "type": "loss", "content": 0.0018475401448085904, "timestamp": "2025-09-10 02:31:27.867279", "step": 5441, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:27.895647", "step": 5441, "epoch": 3 }, { "type": "loss", "content": 0.06767947226762772, "timestamp": "2025-09-10 02:31:27.899172", "step": 5442, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:27.928996", "step": 5442, "epoch": 3 }, { "type": "loss", "content": 0.003510425565764308, "timestamp": "2025-09-10 02:31:27.933140", "step": 5443, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:27.962555", "step": 5443, "epoch": 3 }, { "type": "loss", "content": 0.0025570436846464872, "timestamp": "2025-09-10 02:31:27.986290", "step": 5444, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:28.015107", "step": 5444, "epoch": 3 }, { "type": "loss", "content": 0.00011262608313700184, "timestamp": "2025-09-10 02:31:28.017193", "step": 5445, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:28.046305", "step": 5445, "epoch": 3 }, { "type": "loss", "content": 0.00039159305742941797, "timestamp": "2025-09-10 02:31:28.048214", "step": 5446, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:28.076983", "step": 5446, "epoch": 3 }, { "type": "loss", "content": 0.0006352835334837437, "timestamp": "2025-09-10 02:31:28.079035", "step": 5447, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:28.107689", "step": 5447, "epoch": 3 }, { "type": "loss", "content": 0.002199439564719796, "timestamp": "2025-09-10 02:31:28.133621", "step": 5448, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:28.162487", "step": 5448, "epoch": 3 }, { "type": "loss", "content": 0.013469723053276539, "timestamp": "2025-09-10 02:31:28.164474", "step": 5449, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:28.193270", "step": 5449, "epoch": 3 }, { "type": "loss", "content": 0.017483588308095932, "timestamp": "2025-09-10 02:31:28.195148", "step": 5450, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:28.224483", "step": 5450, "epoch": 3 }, { "type": "loss", "content": 0.0002191327657783404, "timestamp": "2025-09-10 02:31:28.226292", "step": 5451, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:28.255419", "step": 5451, "epoch": 3 }, { "type": "loss", "content": 0.0001765905908541754, "timestamp": "2025-09-10 02:31:28.278771", "step": 5452, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:28.307886", "step": 5452, "epoch": 3 }, { "type": "loss", "content": 0.00013448033132590353, "timestamp": "2025-09-10 02:31:28.310131", "step": 5453, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:28.339054", "step": 5453, "epoch": 3 }, { "type": "loss", "content": 0.005181503016501665, "timestamp": "2025-09-10 02:31:28.341227", "step": 5454, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:28.370507", "step": 5454, "epoch": 3 }, { "type": "loss", "content": 0.00517942663282156, "timestamp": "2025-09-10 02:31:28.373346", "step": 5455, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:28.402330", "step": 5455, "epoch": 3 }, { "type": "loss", "content": 0.006793874315917492, "timestamp": "2025-09-10 02:31:28.425930", "step": 5456, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:28.454436", "step": 5456, "epoch": 3 }, { "type": "loss", "content": 0.00022978255583439022, "timestamp": "2025-09-10 02:31:28.456438", "step": 5457, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:28.485551", "step": 5457, "epoch": 3 }, { "type": "loss", "content": 0.001192291034385562, "timestamp": "2025-09-10 02:31:28.488072", "step": 5458, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:28.517368", "step": 5458, "epoch": 3 }, { "type": "loss", "content": 0.005229536443948746, "timestamp": "2025-09-10 02:31:28.519342", "step": 5459, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:28.549096", "step": 5459, "epoch": 3 }, { "type": "loss", "content": 0.001903381198644638, "timestamp": "2025-09-10 02:31:28.572989", "step": 5460, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:31:28.602682", "step": 5460, "epoch": 3 }, { "type": "loss", "content": 0.001068693003617227, "timestamp": "2025-09-10 02:31:28.604858", "step": 5461, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:28.634936", "step": 5461, "epoch": 3 }, { "type": "loss", "content": 0.002789911115542054, "timestamp": "2025-09-10 02:31:28.636955", "step": 5462, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:28.666027", "step": 5462, "epoch": 3 }, { "type": "loss", "content": 0.0007820177124813199, "timestamp": "2025-09-10 02:31:28.668117", "step": 5463, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:28.696562", "step": 5463, "epoch": 3 }, { "type": "loss", "content": 0.0049674129113554955, "timestamp": "2025-09-10 02:31:28.720063", "step": 5464, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:28.749006", "step": 5464, "epoch": 3 }, { "type": "loss", "content": 0.00047011018614284694, "timestamp": "2025-09-10 02:31:28.751058", "step": 5465, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:28.780409", "step": 5465, "epoch": 3 }, { "type": "loss", "content": 0.013229615055024624, "timestamp": "2025-09-10 02:31:28.782356", "step": 5466, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:28.811414", "step": 5466, "epoch": 3 }, { "type": "loss", "content": 0.0001901341456687078, "timestamp": "2025-09-10 02:31:28.813414", "step": 5467, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:28.842356", "step": 5467, "epoch": 3 }, { "type": "loss", "content": 0.00024926214246079326, "timestamp": "2025-09-10 02:31:28.866168", "step": 5468, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:28.895631", "step": 5468, "epoch": 3 }, { "type": "loss", "content": 0.009387032128870487, "timestamp": "2025-09-10 02:31:28.897605", "step": 5469, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:28.926679", "step": 5469, "epoch": 3 }, { "type": "loss", "content": 0.0005769426352344453, "timestamp": "2025-09-10 02:31:28.928665", "step": 5470, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:28.957528", "step": 5470, "epoch": 3 }, { "type": "loss", "content": 0.0049220966175198555, "timestamp": "2025-09-10 02:31:28.959662", "step": 5471, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:28.988579", "step": 5471, "epoch": 3 }, { "type": "loss", "content": 0.0017110798507928848, "timestamp": "2025-09-10 02:31:29.012069", "step": 5472, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 96 ], "batch_size": 8, "flops": 1898404492032 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 5, 80 ], "batch_size": 8, "flops": 1582003754624 } ], "timestamp": "2025-09-10 02:31:31.144539", "step": 5472, "epoch": 3 }, { "type": "pplx", "content": 2678997.8009577156, "timestamp": "2025-09-10 02:31:31.146492", "step": 5472, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:31.175273", "step": 5472, "epoch": 3 }, { "type": "loss", "content": 0.04426982253789902, "timestamp": "2025-09-10 02:31:31.177258", "step": 5473, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:31.206905", "step": 5473, "epoch": 3 }, { "type": "loss", "content": 0.023119186982512474, "timestamp": "2025-09-10 02:31:31.208802", "step": 5474, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:31.237861", "step": 5474, "epoch": 3 }, { "type": "loss", "content": 0.00042799237417057157, "timestamp": "2025-09-10 02:31:31.240169", "step": 5475, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:31.269421", "step": 5475, "epoch": 3 }, { "type": "loss", "content": 9.680674702394754e-05, "timestamp": "2025-09-10 02:31:31.293075", "step": 5476, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:31.321893", "step": 5476, "epoch": 3 }, { "type": "loss", "content": 0.0021623498760163784, "timestamp": "2025-09-10 02:31:31.323995", "step": 5477, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:31.352694", "step": 5477, "epoch": 3 }, { "type": "loss", "content": 0.0004191426560282707, "timestamp": "2025-09-10 02:31:31.354790", "step": 5478, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:31.383501", "step": 5478, "epoch": 3 }, { "type": "loss", "content": 4.297324994695373e-05, "timestamp": "2025-09-10 02:31:31.385571", "step": 5479, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:31.414170", "step": 5479, "epoch": 3 }, { "type": "loss", "content": 0.046055715531110764, "timestamp": "2025-09-10 02:31:31.437751", "step": 5480, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:31.466686", "step": 5480, "epoch": 3 }, { "type": "loss", "content": 0.0018844603328034282, "timestamp": "2025-09-10 02:31:31.468842", "step": 5481, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:31.497575", "step": 5481, "epoch": 3 }, { "type": "loss", "content": 0.0012828157050535083, "timestamp": "2025-09-10 02:31:31.499538", "step": 5482, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:31.528295", "step": 5482, "epoch": 3 }, { "type": "loss", "content": 0.00011052561603719369, "timestamp": "2025-09-10 02:31:31.530201", "step": 5483, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:31.558870", "step": 5483, "epoch": 3 }, { "type": "loss", "content": 0.0008187596104107797, "timestamp": "2025-09-10 02:31:31.582458", "step": 5484, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:31.611416", "step": 5484, "epoch": 3 }, { "type": "loss", "content": 0.00011922647536266595, "timestamp": "2025-09-10 02:31:31.613298", "step": 5485, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:31.642297", "step": 5485, "epoch": 3 }, { "type": "loss", "content": 0.008758625946938992, "timestamp": "2025-09-10 02:31:31.644233", "step": 5486, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:31.673146", "step": 5486, "epoch": 3 }, { "type": "loss", "content": 0.00020699352899100631, "timestamp": "2025-09-10 02:31:31.675153", "step": 5487, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:31.703751", "step": 5487, "epoch": 3 }, { "type": "loss", "content": 0.0006481898599304259, "timestamp": "2025-09-10 02:31:31.727219", "step": 5488, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:31.755910", "step": 5488, "epoch": 3 }, { "type": "loss", "content": 0.00026211151271127164, "timestamp": "2025-09-10 02:31:31.757785", "step": 5489, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:31.786360", "step": 5489, "epoch": 3 }, { "type": "loss", "content": 0.005063667893409729, "timestamp": "2025-09-10 02:31:31.788379", "step": 5490, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:31.817161", "step": 5490, "epoch": 3 }, { "type": "loss", "content": 0.06866239011287689, "timestamp": "2025-09-10 02:31:31.819107", "step": 5491, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:31.847617", "step": 5491, "epoch": 3 }, { "type": "loss", "content": 0.005610947962850332, "timestamp": "2025-09-10 02:31:31.871282", "step": 5492, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:31.900024", "step": 5492, "epoch": 3 }, { "type": "loss", "content": 9.809240873437375e-05, "timestamp": "2025-09-10 02:31:31.902020", "step": 5493, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:31.930407", "step": 5493, "epoch": 3 }, { "type": "loss", "content": 0.0005431465106084943, "timestamp": "2025-09-10 02:31:31.932359", "step": 5494, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:31.961192", "step": 5494, "epoch": 3 }, { "type": "loss", "content": 0.049388524144887924, "timestamp": "2025-09-10 02:31:31.963537", "step": 5495, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:31.992105", "step": 5495, "epoch": 3 }, { "type": "loss", "content": 0.04355853796005249, "timestamp": "2025-09-10 02:31:32.017022", "step": 5496, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:32.046096", "step": 5496, "epoch": 3 }, { "type": "loss", "content": 0.00019289494957774878, "timestamp": "2025-09-10 02:31:32.048115", "step": 5497, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:32.076691", "step": 5497, "epoch": 3 }, { "type": "loss", "content": 0.00040106798405759037, "timestamp": "2025-09-10 02:31:32.078616", "step": 5498, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:32.107028", "step": 5498, "epoch": 3 }, { "type": "loss", "content": 0.0005221262690611184, "timestamp": "2025-09-10 02:31:32.108971", "step": 5499, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:32.137655", "step": 5499, "epoch": 3 }, { "type": "loss", "content": 0.00019806763157248497, "timestamp": "2025-09-10 02:31:32.161120", "step": 5500, "epoch": 3 }, { "type": "info", "content": "Checkpoint saved at step 5500", "timestamp": "2025-09-10 02:31:36.469939", "step": 5500, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:36.500694", "step": 5500, "epoch": 3 }, { "type": "loss", "content": 0.00017940241377800703, "timestamp": "2025-09-10 02:31:36.502773", "step": 5501, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:36.532246", "step": 5501, "epoch": 3 }, { "type": "loss", "content": 0.0007619414827786386, "timestamp": "2025-09-10 02:31:36.534214", "step": 5502, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:36.563383", "step": 5502, "epoch": 3 }, { "type": "loss", "content": 0.0015142976772040129, "timestamp": "2025-09-10 02:31:36.565867", "step": 5503, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:36.594870", "step": 5503, "epoch": 3 }, { "type": "loss", "content": 0.001050888909958303, "timestamp": "2025-09-10 02:31:36.618879", "step": 5504, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:36.648030", "step": 5504, "epoch": 3 }, { "type": "loss", "content": 0.0003561497724149376, "timestamp": "2025-09-10 02:31:36.650493", "step": 5505, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:36.680271", "step": 5505, "epoch": 3 }, { "type": "loss", "content": 0.0011121274437755346, "timestamp": "2025-09-10 02:31:36.682192", "step": 5506, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:36.711019", "step": 5506, "epoch": 3 }, { "type": "loss", "content": 0.00017145891615655273, "timestamp": "2025-09-10 02:31:36.713232", "step": 5507, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:36.742917", "step": 5507, "epoch": 3 }, { "type": "loss", "content": 0.011950437910854816, "timestamp": "2025-09-10 02:31:36.766327", "step": 5508, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:36.794960", "step": 5508, "epoch": 3 }, { "type": "loss", "content": 8.530188642907888e-05, "timestamp": "2025-09-10 02:31:36.796867", "step": 5509, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:36.825483", "step": 5509, "epoch": 3 }, { "type": "loss", "content": 0.02681620605289936, "timestamp": "2025-09-10 02:31:36.827783", "step": 5510, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:36.857136", "step": 5510, "epoch": 3 }, { "type": "loss", "content": 0.0032190450001507998, "timestamp": "2025-09-10 02:31:36.859190", "step": 5511, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:36.888137", "step": 5511, "epoch": 3 }, { "type": "loss", "content": 0.0009218252380378544, "timestamp": "2025-09-10 02:31:36.911653", "step": 5512, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:36.944105", "step": 5512, "epoch": 3 }, { "type": "loss", "content": 0.0005729981930926442, "timestamp": "2025-09-10 02:31:36.945948", "step": 5513, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:36.974995", "step": 5513, "epoch": 3 }, { "type": "loss", "content": 0.014555713161826134, "timestamp": "2025-09-10 02:31:36.977012", "step": 5514, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:37.005690", "step": 5514, "epoch": 3 }, { "type": "loss", "content": 0.027327081188559532, "timestamp": "2025-09-10 02:31:37.007934", "step": 5515, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:37.036579", "step": 5515, "epoch": 3 }, { "type": "loss", "content": 0.0005217579309828579, "timestamp": "2025-09-10 02:31:37.060221", "step": 5516, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:37.089693", "step": 5516, "epoch": 3 }, { "type": "loss", "content": 0.007404877804219723, "timestamp": "2025-09-10 02:31:37.091674", "step": 5517, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:37.120560", "step": 5517, "epoch": 3 }, { "type": "loss", "content": 0.0016886789817363024, "timestamp": "2025-09-10 02:31:37.123877", "step": 5518, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:37.153073", "step": 5518, "epoch": 3 }, { "type": "loss", "content": 0.0016022982308641076, "timestamp": "2025-09-10 02:31:37.155176", "step": 5519, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:37.183978", "step": 5519, "epoch": 3 }, { "type": "loss", "content": 0.0008979992708191276, "timestamp": "2025-09-10 02:31:37.207615", "step": 5520, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:37.236546", "step": 5520, "epoch": 3 }, { "type": "loss", "content": 0.0015828582691028714, "timestamp": "2025-09-10 02:31:37.238416", "step": 5521, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:37.267180", "step": 5521, "epoch": 3 }, { "type": "loss", "content": 0.000788357516285032, "timestamp": "2025-09-10 02:31:37.269264", "step": 5522, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:37.298092", "step": 5522, "epoch": 3 }, { "type": "loss", "content": 0.0002860610547941178, "timestamp": "2025-09-10 02:31:37.300013", "step": 5523, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:37.328622", "step": 5523, "epoch": 3 }, { "type": "loss", "content": 0.0002962402650155127, "timestamp": "2025-09-10 02:31:37.352070", "step": 5524, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:37.381132", "step": 5524, "epoch": 3 }, { "type": "loss", "content": 0.0002550986537244171, "timestamp": "2025-09-10 02:31:37.383123", "step": 5525, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:37.411856", "step": 5525, "epoch": 3 }, { "type": "loss", "content": 0.0012577223824337125, "timestamp": "2025-09-10 02:31:37.413694", "step": 5526, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:37.442658", "step": 5526, "epoch": 3 }, { "type": "loss", "content": 0.0015119662275537848, "timestamp": "2025-09-10 02:31:37.444540", "step": 5527, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:37.473192", "step": 5527, "epoch": 3 }, { "type": "loss", "content": 0.0013533987803384662, "timestamp": "2025-09-10 02:31:37.496516", "step": 5528, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:37.525880", "step": 5528, "epoch": 3 }, { "type": "loss", "content": 0.007477410137653351, "timestamp": "2025-09-10 02:31:37.527877", "step": 5529, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:37.556486", "step": 5529, "epoch": 3 }, { "type": "loss", "content": 0.000659845769405365, "timestamp": "2025-09-10 02:31:37.558440", "step": 5530, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:37.587192", "step": 5530, "epoch": 3 }, { "type": "loss", "content": 0.00010641255357768387, "timestamp": "2025-09-10 02:31:37.589061", "step": 5531, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:37.617827", "step": 5531, "epoch": 3 }, { "type": "loss", "content": 0.00041397142922505736, "timestamp": "2025-09-10 02:31:37.641401", "step": 5532, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:37.670874", "step": 5532, "epoch": 3 }, { "type": "loss", "content": 0.0022644626442342997, "timestamp": "2025-09-10 02:31:37.673112", "step": 5533, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:37.704029", "step": 5533, "epoch": 3 }, { "type": "loss", "content": 0.00990013126283884, "timestamp": "2025-09-10 02:31:37.706077", "step": 5534, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:37.734718", "step": 5534, "epoch": 3 }, { "type": "loss", "content": 0.0009291375754401088, "timestamp": "2025-09-10 02:31:37.737044", "step": 5535, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:37.766338", "step": 5535, "epoch": 3 }, { "type": "loss", "content": 0.0009419794077984989, "timestamp": "2025-09-10 02:31:37.789976", "step": 5536, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:37.818975", "step": 5536, "epoch": 3 }, { "type": "loss", "content": 0.00018296926282346249, "timestamp": "2025-09-10 02:31:37.820826", "step": 5537, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:37.849359", "step": 5537, "epoch": 3 }, { "type": "loss", "content": 0.0002313188451807946, "timestamp": "2025-09-10 02:31:37.851327", "step": 5538, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:37.880440", "step": 5538, "epoch": 3 }, { "type": "loss", "content": 0.009750072844326496, "timestamp": "2025-09-10 02:31:37.882429", "step": 5539, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:37.911477", "step": 5539, "epoch": 3 }, { "type": "loss", "content": 0.001343383570201695, "timestamp": "2025-09-10 02:31:37.935027", "step": 5540, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:37.964208", "step": 5540, "epoch": 3 }, { "type": "loss", "content": 0.0014413215685635805, "timestamp": "2025-09-10 02:31:37.966078", "step": 5541, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:37.994803", "step": 5541, "epoch": 3 }, { "type": "loss", "content": 0.00017327792011201382, "timestamp": "2025-09-10 02:31:37.996703", "step": 5542, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:38.025334", "step": 5542, "epoch": 3 }, { "type": "loss", "content": 0.000250210432568565, "timestamp": "2025-09-10 02:31:38.027225", "step": 5543, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:38.055921", "step": 5543, "epoch": 3 }, { "type": "loss", "content": 0.00019978173077106476, "timestamp": "2025-09-10 02:31:38.079378", "step": 5544, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:38.107987", "step": 5544, "epoch": 3 }, { "type": "loss", "content": 0.0015174195868894458, "timestamp": "2025-09-10 02:31:38.109981", "step": 5545, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:38.138532", "step": 5545, "epoch": 3 }, { "type": "loss", "content": 0.003827376291155815, "timestamp": "2025-09-10 02:31:38.140393", "step": 5546, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:38.169730", "step": 5546, "epoch": 3 }, { "type": "loss", "content": 0.00042572617530822754, "timestamp": "2025-09-10 02:31:38.171836", "step": 5547, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:38.202721", "step": 5547, "epoch": 3 }, { "type": "loss", "content": 0.023560727015137672, "timestamp": "2025-09-10 02:31:38.226345", "step": 5548, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:38.256053", "step": 5548, "epoch": 3 }, { "type": "loss", "content": 0.0008844585972838104, "timestamp": "2025-09-10 02:31:38.258014", "step": 5549, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:38.287223", "step": 5549, "epoch": 3 }, { "type": "loss", "content": 0.0008046877337619662, "timestamp": "2025-09-10 02:31:38.289098", "step": 5550, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:38.318237", "step": 5550, "epoch": 3 }, { "type": "loss", "content": 0.004426827188581228, "timestamp": "2025-09-10 02:31:38.320254", "step": 5551, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:38.349404", "step": 5551, "epoch": 3 }, { "type": "loss", "content": 0.0012104782508686185, "timestamp": "2025-09-10 02:31:38.372979", "step": 5552, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:38.402790", "step": 5552, "epoch": 3 }, { "type": "loss", "content": 0.00035136216320097446, "timestamp": "2025-09-10 02:31:38.404678", "step": 5553, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:38.433403", "step": 5553, "epoch": 3 }, { "type": "loss", "content": 0.0009261313825845718, "timestamp": "2025-09-10 02:31:38.435411", "step": 5554, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:38.463835", "step": 5554, "epoch": 3 }, { "type": "loss", "content": 0.0003515938005875796, "timestamp": "2025-09-10 02:31:38.465680", "step": 5555, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:38.494063", "step": 5555, "epoch": 3 }, { "type": "loss", "content": 8.098541002254933e-05, "timestamp": "2025-09-10 02:31:38.517458", "step": 5556, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:38.546566", "step": 5556, "epoch": 3 }, { "type": "loss", "content": 0.004003607667982578, "timestamp": "2025-09-10 02:31:38.548586", "step": 5557, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:38.577592", "step": 5557, "epoch": 3 }, { "type": "loss", "content": 0.0034106073435395956, "timestamp": "2025-09-10 02:31:38.579418", "step": 5558, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:38.607492", "step": 5558, "epoch": 3 }, { "type": "loss", "content": 0.00020386728283483535, "timestamp": "2025-09-10 02:31:38.609556", "step": 5559, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:38.639061", "step": 5559, "epoch": 3 }, { "type": "loss", "content": 0.0012458580313250422, "timestamp": "2025-09-10 02:31:38.662476", "step": 5560, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:38.691187", "step": 5560, "epoch": 3 }, { "type": "loss", "content": 0.0059364731423556805, "timestamp": "2025-09-10 02:31:38.693245", "step": 5561, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:38.722834", "step": 5561, "epoch": 3 }, { "type": "loss", "content": 0.0035293761175125837, "timestamp": "2025-09-10 02:31:38.725144", "step": 5562, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:38.754230", "step": 5562, "epoch": 3 }, { "type": "loss", "content": 0.0002680100442375988, "timestamp": "2025-09-10 02:31:38.756216", "step": 5563, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:38.785083", "step": 5563, "epoch": 3 }, { "type": "loss", "content": 0.002597348066046834, "timestamp": "2025-09-10 02:31:38.808524", "step": 5564, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:38.837864", "step": 5564, "epoch": 3 }, { "type": "loss", "content": 0.00014817291230428964, "timestamp": "2025-09-10 02:31:38.839884", "step": 5565, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:38.868866", "step": 5565, "epoch": 3 }, { "type": "loss", "content": 9.81853881967254e-05, "timestamp": "2025-09-10 02:31:38.870909", "step": 5566, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:38.899810", "step": 5566, "epoch": 3 }, { "type": "loss", "content": 0.013567962683737278, "timestamp": "2025-09-10 02:31:38.901709", "step": 5567, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:38.930354", "step": 5567, "epoch": 3 }, { "type": "loss", "content": 0.00038412483991123736, "timestamp": "2025-09-10 02:31:38.953775", "step": 5568, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:38.982570", "step": 5568, "epoch": 3 }, { "type": "loss", "content": 0.0004490498104132712, "timestamp": "2025-09-10 02:31:38.984379", "step": 5569, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:39.013393", "step": 5569, "epoch": 3 }, { "type": "loss", "content": 0.0004223698633722961, "timestamp": "2025-09-10 02:31:39.015470", "step": 5570, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:39.044805", "step": 5570, "epoch": 3 }, { "type": "loss", "content": 0.004776694346219301, "timestamp": "2025-09-10 02:31:39.046909", "step": 5571, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:39.075707", "step": 5571, "epoch": 3 }, { "type": "loss", "content": 0.00011601136066019535, "timestamp": "2025-09-10 02:31:39.099137", "step": 5572, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:39.128263", "step": 5572, "epoch": 3 }, { "type": "loss", "content": 0.00019144189718645066, "timestamp": "2025-09-10 02:31:39.130070", "step": 5573, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:39.158850", "step": 5573, "epoch": 3 }, { "type": "loss", "content": 0.007319327909499407, "timestamp": "2025-09-10 02:31:39.160747", "step": 5574, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:39.189698", "step": 5574, "epoch": 3 }, { "type": "loss", "content": 0.0020018171053379774, "timestamp": "2025-09-10 02:31:39.191672", "step": 5575, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:39.220346", "step": 5575, "epoch": 3 }, { "type": "loss", "content": 0.00011518342944327742, "timestamp": "2025-09-10 02:31:39.243778", "step": 5576, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:39.272722", "step": 5576, "epoch": 3 }, { "type": "loss", "content": 0.0025209735613316298, "timestamp": "2025-09-10 02:31:39.274906", "step": 5577, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:39.303887", "step": 5577, "epoch": 3 }, { "type": "loss", "content": 0.0043420311994850636, "timestamp": "2025-09-10 02:31:39.306238", "step": 5578, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:39.335720", "step": 5578, "epoch": 3 }, { "type": "loss", "content": 0.0002620189916342497, "timestamp": "2025-09-10 02:31:39.337796", "step": 5579, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:39.366805", "step": 5579, "epoch": 3 }, { "type": "loss", "content": 0.008588447235524654, "timestamp": "2025-09-10 02:31:39.390180", "step": 5580, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:39.420049", "step": 5580, "epoch": 3 }, { "type": "loss", "content": 0.000151693748193793, "timestamp": "2025-09-10 02:31:39.421890", "step": 5581, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:39.450942", "step": 5581, "epoch": 3 }, { "type": "loss", "content": 0.00013074011076241732, "timestamp": "2025-09-10 02:31:39.452832", "step": 5582, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:39.481813", "step": 5582, "epoch": 3 }, { "type": "loss", "content": 0.0018191634444519877, "timestamp": "2025-09-10 02:31:39.483876", "step": 5583, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:39.512982", "step": 5583, "epoch": 3 }, { "type": "loss", "content": 5.886574581381865e-05, "timestamp": "2025-09-10 02:31:39.536497", "step": 5584, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:39.565344", "step": 5584, "epoch": 3 }, { "type": "loss", "content": 0.000427374237915501, "timestamp": "2025-09-10 02:31:39.567419", "step": 5585, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:39.596530", "step": 5585, "epoch": 3 }, { "type": "loss", "content": 0.00014232970715966076, "timestamp": "2025-09-10 02:31:39.598484", "step": 5586, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:39.627055", "step": 5586, "epoch": 3 }, { "type": "loss", "content": 0.0010161589598283172, "timestamp": "2025-09-10 02:31:39.629116", "step": 5587, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:39.657863", "step": 5587, "epoch": 3 }, { "type": "loss", "content": 0.004650202579796314, "timestamp": "2025-09-10 02:31:39.681381", "step": 5588, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:39.709693", "step": 5588, "epoch": 3 }, { "type": "loss", "content": 0.0057308003306388855, "timestamp": "2025-09-10 02:31:39.711798", "step": 5589, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:39.740561", "step": 5589, "epoch": 3 }, { "type": "loss", "content": 0.0004541428934317082, "timestamp": "2025-09-10 02:31:39.742519", "step": 5590, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:39.771333", "step": 5590, "epoch": 3 }, { "type": "loss", "content": 4.5271222916198894e-05, "timestamp": "2025-09-10 02:31:39.773370", "step": 5591, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:39.802756", "step": 5591, "epoch": 3 }, { "type": "loss", "content": 0.014283773489296436, "timestamp": "2025-09-10 02:31:39.826378", "step": 5592, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:39.855303", "step": 5592, "epoch": 3 }, { "type": "loss", "content": 0.0002525447343941778, "timestamp": "2025-09-10 02:31:39.857233", "step": 5593, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:39.886053", "step": 5593, "epoch": 3 }, { "type": "loss", "content": 0.0008836208726279438, "timestamp": "2025-09-10 02:31:39.887931", "step": 5594, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:39.916544", "step": 5594, "epoch": 3 }, { "type": "loss", "content": 0.00010469827248016372, "timestamp": "2025-09-10 02:31:39.919956", "step": 5595, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:39.951866", "step": 5595, "epoch": 3 }, { "type": "loss", "content": 0.00025571396690793335, "timestamp": "2025-09-10 02:31:39.975328", "step": 5596, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:40.005137", "step": 5596, "epoch": 3 }, { "type": "loss", "content": 0.0001397687301505357, "timestamp": "2025-09-10 02:31:40.008163", "step": 5597, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:40.038157", "step": 5597, "epoch": 3 }, { "type": "loss", "content": 0.012690062634646893, "timestamp": "2025-09-10 02:31:40.040057", "step": 5598, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:40.068691", "step": 5598, "epoch": 3 }, { "type": "loss", "content": 0.0011016997741535306, "timestamp": "2025-09-10 02:31:40.070526", "step": 5599, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:40.099241", "step": 5599, "epoch": 3 }, { "type": "loss", "content": 0.0002454652276355773, "timestamp": "2025-09-10 02:31:40.122630", "step": 5600, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:40.151811", "step": 5600, "epoch": 3 }, { "type": "loss", "content": 0.021332427859306335, "timestamp": "2025-09-10 02:31:40.153592", "step": 5601, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:40.182392", "step": 5601, "epoch": 3 }, { "type": "loss", "content": 0.00016384133778046817, "timestamp": "2025-09-10 02:31:40.184137", "step": 5602, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:40.212691", "step": 5602, "epoch": 3 }, { "type": "loss", "content": 7.321812881855294e-05, "timestamp": "2025-09-10 02:31:40.214781", "step": 5603, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:40.243470", "step": 5603, "epoch": 3 }, { "type": "loss", "content": 0.0007486808462999761, "timestamp": "2025-09-10 02:31:40.266794", "step": 5604, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:40.295633", "step": 5604, "epoch": 3 }, { "type": "loss", "content": 3.3376003557350487e-05, "timestamp": "2025-09-10 02:31:40.297563", "step": 5605, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:40.326652", "step": 5605, "epoch": 3 }, { "type": "loss", "content": 0.0007316760602407157, "timestamp": "2025-09-10 02:31:40.328577", "step": 5606, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:40.357520", "step": 5606, "epoch": 3 }, { "type": "loss", "content": 0.0020737769082188606, "timestamp": "2025-09-10 02:31:40.359640", "step": 5607, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:40.388693", "step": 5607, "epoch": 3 }, { "type": "loss", "content": 0.00014511286281049252, "timestamp": "2025-09-10 02:31:40.412316", "step": 5608, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:40.441442", "step": 5608, "epoch": 3 }, { "type": "loss", "content": 0.0005348823615349829, "timestamp": "2025-09-10 02:31:40.443377", "step": 5609, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:40.472212", "step": 5609, "epoch": 3 }, { "type": "loss", "content": 0.00016290562052745372, "timestamp": "2025-09-10 02:31:40.474036", "step": 5610, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:40.503205", "step": 5610, "epoch": 3 }, { "type": "loss", "content": 0.0005925578298047185, "timestamp": "2025-09-10 02:31:40.505223", "step": 5611, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:40.533963", "step": 5611, "epoch": 3 }, { "type": "loss", "content": 7.499481580452994e-05, "timestamp": "2025-09-10 02:31:40.557309", "step": 5612, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:40.587260", "step": 5612, "epoch": 3 }, { "type": "loss", "content": 0.00024712338927201927, "timestamp": "2025-09-10 02:31:40.589287", "step": 5613, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:40.618337", "step": 5613, "epoch": 3 }, { "type": "loss", "content": 7.08239313098602e-05, "timestamp": "2025-09-10 02:31:40.620209", "step": 5614, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:40.649286", "step": 5614, "epoch": 3 }, { "type": "loss", "content": 0.00010430154361529276, "timestamp": "2025-09-10 02:31:40.651104", "step": 5615, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:40.679819", "step": 5615, "epoch": 3 }, { "type": "loss", "content": 0.002154013141989708, "timestamp": "2025-09-10 02:31:40.703579", "step": 5616, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:40.732286", "step": 5616, "epoch": 3 }, { "type": "loss", "content": 0.007276777643710375, "timestamp": "2025-09-10 02:31:40.734264", "step": 5617, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:40.762943", "step": 5617, "epoch": 3 }, { "type": "loss", "content": 0.00033290876308456063, "timestamp": "2025-09-10 02:31:40.764602", "step": 5618, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:40.793096", "step": 5618, "epoch": 3 }, { "type": "loss", "content": 8.302512287627906e-05, "timestamp": "2025-09-10 02:31:40.794950", "step": 5619, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:40.823374", "step": 5619, "epoch": 3 }, { "type": "loss", "content": 0.05603466182947159, "timestamp": "2025-09-10 02:31:40.846962", "step": 5620, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:40.875806", "step": 5620, "epoch": 3 }, { "type": "loss", "content": 0.0015819476684555411, "timestamp": "2025-09-10 02:31:40.878005", "step": 5621, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:40.906769", "step": 5621, "epoch": 3 }, { "type": "loss", "content": 0.000151911357534118, "timestamp": "2025-09-10 02:31:40.908671", "step": 5622, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:40.936861", "step": 5622, "epoch": 3 }, { "type": "loss", "content": 0.04918529838323593, "timestamp": "2025-09-10 02:31:40.938667", "step": 5623, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:40.967213", "step": 5623, "epoch": 3 }, { "type": "loss", "content": 0.00021279227803461254, "timestamp": "2025-09-10 02:31:40.990567", "step": 5624, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 96 ], "batch_size": 8, "flops": 1898404492032 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 5, 80 ], "batch_size": 8, "flops": 1582003754624 } ], "timestamp": "2025-09-10 02:31:42.843387", "step": 5624, "epoch": 3 }, { "type": "pplx", "content": 2814016.08539282, "timestamp": "2025-09-10 02:31:42.845309", "step": 5624, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:42.874693", "step": 5624, "epoch": 3 }, { "type": "loss", "content": 0.00011733026622096077, "timestamp": "2025-09-10 02:31:42.876852", "step": 5625, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:42.905969", "step": 5625, "epoch": 3 }, { "type": "loss", "content": 7.083137461449951e-05, "timestamp": "2025-09-10 02:31:42.907990", "step": 5626, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:42.936869", "step": 5626, "epoch": 3 }, { "type": "loss", "content": 0.0006454582908190787, "timestamp": "2025-09-10 02:31:42.938835", "step": 5627, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:42.968156", "step": 5627, "epoch": 3 }, { "type": "loss", "content": 8.90479568624869e-05, "timestamp": "2025-09-10 02:31:42.991841", "step": 5628, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:43.021223", "step": 5628, "epoch": 3 }, { "type": "loss", "content": 0.0027883213479071856, "timestamp": "2025-09-10 02:31:43.023319", "step": 5629, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:43.052636", "step": 5629, "epoch": 3 }, { "type": "loss", "content": 0.023280059918761253, "timestamp": "2025-09-10 02:31:43.054710", "step": 5630, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:43.084362", "step": 5630, "epoch": 3 }, { "type": "loss", "content": 0.0001530003355583176, "timestamp": "2025-09-10 02:31:43.086365", "step": 5631, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:43.115704", "step": 5631, "epoch": 3 }, { "type": "loss", "content": 0.0004718450072687119, "timestamp": "2025-09-10 02:31:43.139178", "step": 5632, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:43.168697", "step": 5632, "epoch": 3 }, { "type": "loss", "content": 0.0001157821825472638, "timestamp": "2025-09-10 02:31:43.170735", "step": 5633, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:43.199701", "step": 5633, "epoch": 3 }, { "type": "loss", "content": 0.00031720413244329393, "timestamp": "2025-09-10 02:31:43.201518", "step": 5634, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:43.230866", "step": 5634, "epoch": 3 }, { "type": "loss", "content": 0.0022304304875433445, "timestamp": "2025-09-10 02:31:43.232883", "step": 5635, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:43.262239", "step": 5635, "epoch": 3 }, { "type": "loss", "content": 0.00013950928405392915, "timestamp": "2025-09-10 02:31:43.285561", "step": 5636, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:43.314389", "step": 5636, "epoch": 3 }, { "type": "loss", "content": 0.0002411556924926117, "timestamp": "2025-09-10 02:31:43.316349", "step": 5637, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:43.345270", "step": 5637, "epoch": 3 }, { "type": "loss", "content": 0.0008664571796543896, "timestamp": "2025-09-10 02:31:43.347009", "step": 5638, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:43.376272", "step": 5638, "epoch": 3 }, { "type": "loss", "content": 8.972598152467981e-05, "timestamp": "2025-09-10 02:31:43.378122", "step": 5639, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:43.407095", "step": 5639, "epoch": 3 }, { "type": "loss", "content": 0.0002920062397606671, "timestamp": "2025-09-10 02:31:43.430530", "step": 5640, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:43.459457", "step": 5640, "epoch": 3 }, { "type": "loss", "content": 0.00020197234698571265, "timestamp": "2025-09-10 02:31:43.461268", "step": 5641, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:43.490235", "step": 5641, "epoch": 3 }, { "type": "loss", "content": 8.653556142235175e-05, "timestamp": "2025-09-10 02:31:43.492030", "step": 5642, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:43.520830", "step": 5642, "epoch": 3 }, { "type": "loss", "content": 0.08047207444906235, "timestamp": "2025-09-10 02:31:43.523142", "step": 5643, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:43.552596", "step": 5643, "epoch": 3 }, { "type": "loss", "content": 0.00019806034106295556, "timestamp": "2025-09-10 02:31:43.576175", "step": 5644, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:43.610721", "step": 5644, "epoch": 3 }, { "type": "loss", "content": 8.005097333807498e-05, "timestamp": "2025-09-10 02:31:43.612601", "step": 5645, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:43.645544", "step": 5645, "epoch": 3 }, { "type": "loss", "content": 8.392855670535937e-05, "timestamp": "2025-09-10 02:31:43.647602", "step": 5646, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:43.686377", "step": 5646, "epoch": 3 }, { "type": "loss", "content": 0.0001441091444576159, "timestamp": "2025-09-10 02:31:43.688549", "step": 5647, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:43.726579", "step": 5647, "epoch": 3 }, { "type": "loss", "content": 0.0011947358725592494, "timestamp": "2025-09-10 02:31:43.750139", "step": 5648, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:43.786021", "step": 5648, "epoch": 3 }, { "type": "loss", "content": 0.00032596822711639106, "timestamp": "2025-09-10 02:31:43.787978", "step": 5649, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:43.826629", "step": 5649, "epoch": 3 }, { "type": "loss", "content": 0.00025973832816816866, "timestamp": "2025-09-10 02:31:43.828517", "step": 5650, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:43.857307", "step": 5650, "epoch": 3 }, { "type": "loss", "content": 0.04941494017839432, "timestamp": "2025-09-10 02:31:43.859295", "step": 5651, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:43.888495", "step": 5651, "epoch": 3 }, { "type": "loss", "content": 0.0008920299587771297, "timestamp": "2025-09-10 02:31:43.911974", "step": 5652, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:43.940232", "step": 5652, "epoch": 3 }, { "type": "loss", "content": 0.0006275326013565063, "timestamp": "2025-09-10 02:31:43.942024", "step": 5653, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:43.970685", "step": 5653, "epoch": 3 }, { "type": "loss", "content": 0.0006861002766527236, "timestamp": "2025-09-10 02:31:43.972663", "step": 5654, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:44.001443", "step": 5654, "epoch": 3 }, { "type": "loss", "content": 0.010110259056091309, "timestamp": "2025-09-10 02:31:44.003243", "step": 5655, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:44.032526", "step": 5655, "epoch": 3 }, { "type": "loss", "content": 7.881731289671734e-05, "timestamp": "2025-09-10 02:31:44.055964", "step": 5656, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:44.084872", "step": 5656, "epoch": 3 }, { "type": "loss", "content": 0.0004487546975724399, "timestamp": "2025-09-10 02:31:44.087014", "step": 5657, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:44.115979", "step": 5657, "epoch": 3 }, { "type": "loss", "content": 0.000640097598079592, "timestamp": "2025-09-10 02:31:44.118054", "step": 5658, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:44.147238", "step": 5658, "epoch": 3 }, { "type": "loss", "content": 0.00021092274982947856, "timestamp": "2025-09-10 02:31:44.149303", "step": 5659, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:44.178211", "step": 5659, "epoch": 3 }, { "type": "loss", "content": 0.0008118998957797885, "timestamp": "2025-09-10 02:31:44.201546", "step": 5660, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:44.237925", "step": 5660, "epoch": 3 }, { "type": "loss", "content": 0.015752341598272324, "timestamp": "2025-09-10 02:31:44.239656", "step": 5661, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:44.267927", "step": 5661, "epoch": 3 }, { "type": "loss", "content": 0.00528930826112628, "timestamp": "2025-09-10 02:31:44.269892", "step": 5662, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:44.299095", "step": 5662, "epoch": 3 }, { "type": "loss", "content": 0.0009915790287777781, "timestamp": "2025-09-10 02:31:44.300867", "step": 5663, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:44.329553", "step": 5663, "epoch": 3 }, { "type": "loss", "content": 0.0002180993469664827, "timestamp": "2025-09-10 02:31:44.352786", "step": 5664, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:44.382266", "step": 5664, "epoch": 3 }, { "type": "loss", "content": 0.00041963360854424536, "timestamp": "2025-09-10 02:31:44.384246", "step": 5665, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:44.412826", "step": 5665, "epoch": 3 }, { "type": "loss", "content": 0.0043303873389959335, "timestamp": "2025-09-10 02:31:44.414857", "step": 5666, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:44.443701", "step": 5666, "epoch": 3 }, { "type": "loss", "content": 0.0033870781771838665, "timestamp": "2025-09-10 02:31:44.445414", "step": 5667, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:44.473507", "step": 5667, "epoch": 3 }, { "type": "loss", "content": 6.146782106952742e-05, "timestamp": "2025-09-10 02:31:44.496975", "step": 5668, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:44.526086", "step": 5668, "epoch": 3 }, { "type": "loss", "content": 0.0021099084988236427, "timestamp": "2025-09-10 02:31:44.528000", "step": 5669, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:44.556909", "step": 5669, "epoch": 3 }, { "type": "loss", "content": 0.0007611610344611108, "timestamp": "2025-09-10 02:31:44.558660", "step": 5670, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:44.587221", "step": 5670, "epoch": 3 }, { "type": "loss", "content": 0.0006143067148514092, "timestamp": "2025-09-10 02:31:44.588982", "step": 5671, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:44.617265", "step": 5671, "epoch": 3 }, { "type": "loss", "content": 0.0037563766818493605, "timestamp": "2025-09-10 02:31:44.640788", "step": 5672, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:44.669996", "step": 5672, "epoch": 3 }, { "type": "loss", "content": 8.10855271993205e-05, "timestamp": "2025-09-10 02:31:44.671838", "step": 5673, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:44.700530", "step": 5673, "epoch": 3 }, { "type": "loss", "content": 0.00016959061031229794, "timestamp": "2025-09-10 02:31:44.702508", "step": 5674, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:44.730994", "step": 5674, "epoch": 3 }, { "type": "loss", "content": 0.005033043213188648, "timestamp": "2025-09-10 02:31:44.732701", "step": 5675, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:44.760959", "step": 5675, "epoch": 3 }, { "type": "loss", "content": 0.0006991572445258498, "timestamp": "2025-09-10 02:31:44.784300", "step": 5676, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:44.813237", "step": 5676, "epoch": 3 }, { "type": "loss", "content": 0.00017229022341780365, "timestamp": "2025-09-10 02:31:44.815200", "step": 5677, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:44.844700", "step": 5677, "epoch": 3 }, { "type": "loss", "content": 0.05727619677782059, "timestamp": "2025-09-10 02:31:44.846615", "step": 5678, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:44.875188", "step": 5678, "epoch": 3 }, { "type": "loss", "content": 0.0001972148020286113, "timestamp": "2025-09-10 02:31:44.877152", "step": 5679, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:44.905222", "step": 5679, "epoch": 3 }, { "type": "loss", "content": 0.00023282249458134174, "timestamp": "2025-09-10 02:31:44.928638", "step": 5680, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:44.957941", "step": 5680, "epoch": 3 }, { "type": "loss", "content": 0.007196842692792416, "timestamp": "2025-09-10 02:31:44.959819", "step": 5681, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:44.988732", "step": 5681, "epoch": 3 }, { "type": "loss", "content": 0.00014656053099315614, "timestamp": "2025-09-10 02:31:44.990476", "step": 5682, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:45.020624", "step": 5682, "epoch": 3 }, { "type": "loss", "content": 0.00015152478590607643, "timestamp": "2025-09-10 02:31:45.022868", "step": 5683, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:45.052195", "step": 5683, "epoch": 3 }, { "type": "loss", "content": 0.0005283069331198931, "timestamp": "2025-09-10 02:31:45.075387", "step": 5684, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:45.104863", "step": 5684, "epoch": 3 }, { "type": "loss", "content": 0.003561926307156682, "timestamp": "2025-09-10 02:31:45.106557", "step": 5685, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:45.135150", "step": 5685, "epoch": 3 }, { "type": "loss", "content": 0.002516575623303652, "timestamp": "2025-09-10 02:31:45.137208", "step": 5686, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:45.167014", "step": 5686, "epoch": 3 }, { "type": "loss", "content": 8.199255535146222e-05, "timestamp": "2025-09-10 02:31:45.168861", "step": 5687, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:45.200179", "step": 5687, "epoch": 3 }, { "type": "loss", "content": 5.475278521771543e-05, "timestamp": "2025-09-10 02:31:45.223740", "step": 5688, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:45.256873", "step": 5688, "epoch": 3 }, { "type": "loss", "content": 0.000620243779849261, "timestamp": "2025-09-10 02:31:45.259035", "step": 5689, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:45.287935", "step": 5689, "epoch": 3 }, { "type": "loss", "content": 0.0006482009775936604, "timestamp": "2025-09-10 02:31:45.290037", "step": 5690, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:45.318725", "step": 5690, "epoch": 3 }, { "type": "loss", "content": 6.253737228689715e-05, "timestamp": "2025-09-10 02:31:45.320919", "step": 5691, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:45.353501", "step": 5691, "epoch": 3 }, { "type": "loss", "content": 0.00028412532992661, "timestamp": "2025-09-10 02:31:45.376964", "step": 5692, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:45.410709", "step": 5692, "epoch": 3 }, { "type": "loss", "content": 0.0006797489477321506, "timestamp": "2025-09-10 02:31:45.412658", "step": 5693, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:45.443951", "step": 5693, "epoch": 3 }, { "type": "loss", "content": 0.00047161945258267224, "timestamp": "2025-09-10 02:31:45.445822", "step": 5694, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:45.477767", "step": 5694, "epoch": 3 }, { "type": "loss", "content": 7.504104723921046e-05, "timestamp": "2025-09-10 02:31:45.479546", "step": 5695, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:45.513638", "step": 5695, "epoch": 3 }, { "type": "loss", "content": 8.778285700827837e-05, "timestamp": "2025-09-10 02:31:45.537138", "step": 5696, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:45.569648", "step": 5696, "epoch": 3 }, { "type": "loss", "content": 0.009277158416807652, "timestamp": "2025-09-10 02:31:45.571474", "step": 5697, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:45.605182", "step": 5697, "epoch": 3 }, { "type": "loss", "content": 0.0006705053965561092, "timestamp": "2025-09-10 02:31:45.607223", "step": 5698, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:45.641179", "step": 5698, "epoch": 3 }, { "type": "loss", "content": 0.0013212297344580293, "timestamp": "2025-09-10 02:31:45.643176", "step": 5699, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:45.677485", "step": 5699, "epoch": 3 }, { "type": "loss", "content": 0.00017160980496555567, "timestamp": "2025-09-10 02:31:45.701106", "step": 5700, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:45.738028", "step": 5700, "epoch": 3 }, { "type": "loss", "content": 0.00633283331990242, "timestamp": "2025-09-10 02:31:45.740062", "step": 5701, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:45.774151", "step": 5701, "epoch": 3 }, { "type": "loss", "content": 0.012610764242708683, "timestamp": "2025-09-10 02:31:45.776272", "step": 5702, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:45.813812", "step": 5702, "epoch": 3 }, { "type": "loss", "content": 0.07728030532598495, "timestamp": "2025-09-10 02:31:45.815562", "step": 5703, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:45.845637", "step": 5703, "epoch": 3 }, { "type": "loss", "content": 0.004781925585120916, "timestamp": "2025-09-10 02:31:45.868996", "step": 5704, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:45.898348", "step": 5704, "epoch": 3 }, { "type": "loss", "content": 0.04369015619158745, "timestamp": "2025-09-10 02:31:45.900386", "step": 5705, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:45.928989", "step": 5705, "epoch": 3 }, { "type": "loss", "content": 0.00011943600111408159, "timestamp": "2025-09-10 02:31:45.930951", "step": 5706, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:45.959727", "step": 5706, "epoch": 3 }, { "type": "loss", "content": 0.0006643111119046807, "timestamp": "2025-09-10 02:31:45.961777", "step": 5707, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:45.990230", "step": 5707, "epoch": 3 }, { "type": "loss", "content": 0.00022974215971771628, "timestamp": "2025-09-10 02:31:46.013777", "step": 5708, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:46.042707", "step": 5708, "epoch": 3 }, { "type": "loss", "content": 0.0005465334397740662, "timestamp": "2025-09-10 02:31:46.044668", "step": 5709, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:46.074526", "step": 5709, "epoch": 3 }, { "type": "loss", "content": 0.0023475922644138336, "timestamp": "2025-09-10 02:31:46.076388", "step": 5710, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:46.105245", "step": 5710, "epoch": 3 }, { "type": "loss", "content": 0.00036916168755851686, "timestamp": "2025-09-10 02:31:46.107055", "step": 5711, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:46.135601", "step": 5711, "epoch": 3 }, { "type": "loss", "content": 0.0005269505199976265, "timestamp": "2025-09-10 02:31:46.159024", "step": 5712, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:46.188184", "step": 5712, "epoch": 3 }, { "type": "loss", "content": 0.008309791795909405, "timestamp": "2025-09-10 02:31:46.190126", "step": 5713, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:46.218870", "step": 5713, "epoch": 3 }, { "type": "loss", "content": 0.000230509860557504, "timestamp": "2025-09-10 02:31:46.220536", "step": 5714, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:46.249095", "step": 5714, "epoch": 3 }, { "type": "loss", "content": 0.00010207213199464604, "timestamp": "2025-09-10 02:31:46.251119", "step": 5715, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:46.279905", "step": 5715, "epoch": 3 }, { "type": "loss", "content": 0.0008101228740997612, "timestamp": "2025-09-10 02:31:46.303302", "step": 5716, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:31:46.332126", "step": 5716, "epoch": 3 }, { "type": "loss", "content": 0.00028212874894961715, "timestamp": "2025-09-10 02:31:46.334070", "step": 5717, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:46.362881", "step": 5717, "epoch": 3 }, { "type": "loss", "content": 0.007300230674445629, "timestamp": "2025-09-10 02:31:46.364600", "step": 5718, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:46.393334", "step": 5718, "epoch": 3 }, { "type": "loss", "content": 0.007928567007184029, "timestamp": "2025-09-10 02:31:46.395258", "step": 5719, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:46.424430", "step": 5719, "epoch": 3 }, { "type": "loss", "content": 0.0006109363748691976, "timestamp": "2025-09-10 02:31:46.447983", "step": 5720, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:46.477844", "step": 5720, "epoch": 3 }, { "type": "loss", "content": 0.0002473728090990335, "timestamp": "2025-09-10 02:31:46.479678", "step": 5721, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:46.508465", "step": 5721, "epoch": 3 }, { "type": "loss", "content": 0.0004756476264446974, "timestamp": "2025-09-10 02:31:46.510195", "step": 5722, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:46.539048", "step": 5722, "epoch": 3 }, { "type": "loss", "content": 0.0001675796665949747, "timestamp": "2025-09-10 02:31:46.541145", "step": 5723, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:46.570097", "step": 5723, "epoch": 3 }, { "type": "loss", "content": 0.0015109594678506255, "timestamp": "2025-09-10 02:31:46.593780", "step": 5724, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:46.623889", "step": 5724, "epoch": 3 }, { "type": "loss", "content": 0.0002595992118585855, "timestamp": "2025-09-10 02:31:46.625533", "step": 5725, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:46.654084", "step": 5725, "epoch": 3 }, { "type": "loss", "content": 8.602546586189419e-05, "timestamp": "2025-09-10 02:31:46.656194", "step": 5726, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:46.685045", "step": 5726, "epoch": 3 }, { "type": "loss", "content": 0.0002019908424699679, "timestamp": "2025-09-10 02:31:46.686852", "step": 5727, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:46.715506", "step": 5727, "epoch": 3 }, { "type": "loss", "content": 0.0012018673587590456, "timestamp": "2025-09-10 02:31:46.738983", "step": 5728, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:46.767808", "step": 5728, "epoch": 3 }, { "type": "loss", "content": 0.03411541134119034, "timestamp": "2025-09-10 02:31:46.769561", "step": 5729, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:46.798383", "step": 5729, "epoch": 3 }, { "type": "loss", "content": 0.03333604335784912, "timestamp": "2025-09-10 02:31:46.800214", "step": 5730, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:46.829696", "step": 5730, "epoch": 3 }, { "type": "loss", "content": 0.0006598546169698238, "timestamp": "2025-09-10 02:31:46.831891", "step": 5731, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:46.861057", "step": 5731, "epoch": 3 }, { "type": "loss", "content": 0.0002100792044075206, "timestamp": "2025-09-10 02:31:46.884304", "step": 5732, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:46.913193", "step": 5732, "epoch": 3 }, { "type": "loss", "content": 0.0016376635758206248, "timestamp": "2025-09-10 02:31:46.914935", "step": 5733, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:46.943696", "step": 5733, "epoch": 3 }, { "type": "loss", "content": 0.0001775900600478053, "timestamp": "2025-09-10 02:31:46.945537", "step": 5734, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:46.974148", "step": 5734, "epoch": 3 }, { "type": "loss", "content": 0.0016689964104443789, "timestamp": "2025-09-10 02:31:46.976102", "step": 5735, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:47.005222", "step": 5735, "epoch": 3 }, { "type": "loss", "content": 0.00017439691873732954, "timestamp": "2025-09-10 02:31:47.028498", "step": 5736, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:47.057064", "step": 5736, "epoch": 3 }, { "type": "loss", "content": 0.00033982301829382777, "timestamp": "2025-09-10 02:31:47.058925", "step": 5737, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:47.087924", "step": 5737, "epoch": 3 }, { "type": "loss", "content": 0.0007048108382150531, "timestamp": "2025-09-10 02:31:47.089932", "step": 5738, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:47.118528", "step": 5738, "epoch": 3 }, { "type": "loss", "content": 0.0036625401116907597, "timestamp": "2025-09-10 02:31:47.120499", "step": 5739, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:47.149082", "step": 5739, "epoch": 3 }, { "type": "loss", "content": 0.0017860140651464462, "timestamp": "2025-09-10 02:31:47.172395", "step": 5740, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:47.201947", "step": 5740, "epoch": 3 }, { "type": "loss", "content": 0.027387376874685287, "timestamp": "2025-09-10 02:31:47.203784", "step": 5741, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:47.234056", "step": 5741, "epoch": 3 }, { "type": "loss", "content": 0.01553224865347147, "timestamp": "2025-09-10 02:31:47.235775", "step": 5742, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:47.267071", "step": 5742, "epoch": 3 }, { "type": "loss", "content": 0.006673471070826054, "timestamp": "2025-09-10 02:31:47.268720", "step": 5743, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:47.297478", "step": 5743, "epoch": 3 }, { "type": "loss", "content": 0.00022507687390316278, "timestamp": "2025-09-10 02:31:47.320983", "step": 5744, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:47.353129", "step": 5744, "epoch": 3 }, { "type": "loss", "content": 0.00032787161762826145, "timestamp": "2025-09-10 02:31:47.354960", "step": 5745, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:47.386566", "step": 5745, "epoch": 3 }, { "type": "loss", "content": 0.00011085504229413345, "timestamp": "2025-09-10 02:31:47.388558", "step": 5746, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:47.422358", "step": 5746, "epoch": 3 }, { "type": "loss", "content": 0.0011449556332081556, "timestamp": "2025-09-10 02:31:47.424370", "step": 5747, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:47.454433", "step": 5747, "epoch": 3 }, { "type": "loss", "content": 0.0012391246855258942, "timestamp": "2025-09-10 02:31:47.477679", "step": 5748, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:47.512361", "step": 5748, "epoch": 3 }, { "type": "loss", "content": 0.036838971078395844, "timestamp": "2025-09-10 02:31:47.514148", "step": 5749, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:47.544998", "step": 5749, "epoch": 3 }, { "type": "loss", "content": 0.00018976135470438749, "timestamp": "2025-09-10 02:31:47.546875", "step": 5750, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:47.582705", "step": 5750, "epoch": 3 }, { "type": "loss", "content": 0.002059570513665676, "timestamp": "2025-09-10 02:31:47.584587", "step": 5751, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:47.617860", "step": 5751, "epoch": 3 }, { "type": "loss", "content": 0.005095013417303562, "timestamp": "2025-09-10 02:31:47.641312", "step": 5752, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:47.673022", "step": 5752, "epoch": 3 }, { "type": "loss", "content": 0.002301124855875969, "timestamp": "2025-09-10 02:31:47.674787", "step": 5753, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:47.712711", "step": 5753, "epoch": 3 }, { "type": "loss", "content": 7.774233381496742e-05, "timestamp": "2025-09-10 02:31:47.714989", "step": 5754, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:47.752301", "step": 5754, "epoch": 3 }, { "type": "loss", "content": 0.00024010757624637336, "timestamp": "2025-09-10 02:31:47.754390", "step": 5755, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:47.791725", "step": 5755, "epoch": 3 }, { "type": "loss", "content": 0.0002837387437466532, "timestamp": "2025-09-10 02:31:47.815135", "step": 5756, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:47.845576", "step": 5756, "epoch": 3 }, { "type": "loss", "content": 0.0004090330039616674, "timestamp": "2025-09-10 02:31:47.847474", "step": 5757, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:47.876408", "step": 5757, "epoch": 3 }, { "type": "loss", "content": 0.00031902006594464183, "timestamp": "2025-09-10 02:31:47.878312", "step": 5758, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:47.907084", "step": 5758, "epoch": 3 }, { "type": "loss", "content": 0.004382619168609381, "timestamp": "2025-09-10 02:31:47.909141", "step": 5759, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:47.937802", "step": 5759, "epoch": 3 }, { "type": "loss", "content": 0.08181292563676834, "timestamp": "2025-09-10 02:31:47.961436", "step": 5760, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:47.990622", "step": 5760, "epoch": 3 }, { "type": "loss", "content": 0.0013247819151729345, "timestamp": "2025-09-10 02:31:47.992426", "step": 5761, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:48.021128", "step": 5761, "epoch": 3 }, { "type": "loss", "content": 0.0007081844960339367, "timestamp": "2025-09-10 02:31:48.022663", "step": 5762, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:31:48.051528", "step": 5762, "epoch": 3 }, { "type": "loss", "content": 0.0015095778508111835, "timestamp": "2025-09-10 02:31:48.053937", "step": 5763, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:48.083710", "step": 5763, "epoch": 3 }, { "type": "loss", "content": 0.0020556601230055094, "timestamp": "2025-09-10 02:31:48.107152", "step": 5764, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:48.136711", "step": 5764, "epoch": 3 }, { "type": "loss", "content": 0.00028491643024608493, "timestamp": "2025-09-10 02:31:48.138518", "step": 5765, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:48.167250", "step": 5765, "epoch": 3 }, { "type": "loss", "content": 0.0002188169164583087, "timestamp": "2025-09-10 02:31:48.168985", "step": 5766, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:48.197866", "step": 5766, "epoch": 3 }, { "type": "loss", "content": 0.04480767622590065, "timestamp": "2025-09-10 02:31:48.199672", "step": 5767, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:48.228449", "step": 5767, "epoch": 3 }, { "type": "loss", "content": 0.00042242585914209485, "timestamp": "2025-09-10 02:31:48.251728", "step": 5768, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:48.280885", "step": 5768, "epoch": 3 }, { "type": "loss", "content": 0.0026514700148254633, "timestamp": "2025-09-10 02:31:48.282594", "step": 5769, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:48.311204", "step": 5769, "epoch": 3 }, { "type": "loss", "content": 0.012968815863132477, "timestamp": "2025-09-10 02:31:48.312947", "step": 5770, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:48.341817", "step": 5770, "epoch": 3 }, { "type": "loss", "content": 0.04409560188651085, "timestamp": "2025-09-10 02:31:48.343913", "step": 5771, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:48.374021", "step": 5771, "epoch": 3 }, { "type": "loss", "content": 0.00035515808849595487, "timestamp": "2025-09-10 02:31:48.398433", "step": 5772, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:48.427376", "step": 5772, "epoch": 3 }, { "type": "loss", "content": 0.0007939469651319087, "timestamp": "2025-09-10 02:31:48.429340", "step": 5773, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:48.458475", "step": 5773, "epoch": 3 }, { "type": "loss", "content": 0.00019278070249129087, "timestamp": "2025-09-10 02:31:48.460554", "step": 5774, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:48.489718", "step": 5774, "epoch": 3 }, { "type": "loss", "content": 0.00019546682597137988, "timestamp": "2025-09-10 02:31:48.491421", "step": 5775, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:48.520608", "step": 5775, "epoch": 3 }, { "type": "loss", "content": 0.0004952874151058495, "timestamp": "2025-09-10 02:31:48.544045", "step": 5776, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 96 ], "batch_size": 8, "flops": 1898404492032 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 5, 80 ], "batch_size": 8, "flops": 1582003754624 } ], "timestamp": "2025-09-10 02:31:50.410059", "step": 5776, "epoch": 3 }, { "type": "pplx", "content": 2330602.4634110588, "timestamp": "2025-09-10 02:31:50.411927", "step": 5776, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:50.440035", "step": 5776, "epoch": 3 }, { "type": "loss", "content": 0.0005920507828705013, "timestamp": "2025-09-10 02:31:50.441848", "step": 5777, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:50.471248", "step": 5777, "epoch": 3 }, { "type": "loss", "content": 0.00026921770768240094, "timestamp": "2025-09-10 02:31:50.472899", "step": 5778, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:50.501698", "step": 5778, "epoch": 3 }, { "type": "loss", "content": 0.0002104615414282307, "timestamp": "2025-09-10 02:31:50.503654", "step": 5779, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:50.532767", "step": 5779, "epoch": 3 }, { "type": "loss", "content": 0.0003762553387787193, "timestamp": "2025-09-10 02:31:50.557091", "step": 5780, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:50.587349", "step": 5780, "epoch": 3 }, { "type": "loss", "content": 0.0065718465484678745, "timestamp": "2025-09-10 02:31:50.589917", "step": 5781, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:50.619518", "step": 5781, "epoch": 3 }, { "type": "loss", "content": 0.0004872330173384398, "timestamp": "2025-09-10 02:31:50.622054", "step": 5782, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:50.651886", "step": 5782, "epoch": 3 }, { "type": "loss", "content": 0.0020292175468057394, "timestamp": "2025-09-10 02:31:50.654172", "step": 5783, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:50.683412", "step": 5783, "epoch": 3 }, { "type": "loss", "content": 0.00045010444591753185, "timestamp": "2025-09-10 02:31:50.706752", "step": 5784, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:50.736095", "step": 5784, "epoch": 3 }, { "type": "loss", "content": 0.002559373155236244, "timestamp": "2025-09-10 02:31:50.737889", "step": 5785, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:50.766697", "step": 5785, "epoch": 3 }, { "type": "loss", "content": 0.0009514554985798895, "timestamp": "2025-09-10 02:31:50.770075", "step": 5786, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:50.799853", "step": 5786, "epoch": 3 }, { "type": "loss", "content": 0.0012386254966259003, "timestamp": "2025-09-10 02:31:50.801607", "step": 5787, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:50.830616", "step": 5787, "epoch": 3 }, { "type": "loss", "content": 0.0008899965905584395, "timestamp": "2025-09-10 02:31:50.853985", "step": 5788, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:50.883341", "step": 5788, "epoch": 3 }, { "type": "loss", "content": 0.0013884452637284994, "timestamp": "2025-09-10 02:31:50.885093", "step": 5789, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:50.913824", "step": 5789, "epoch": 3 }, { "type": "loss", "content": 0.0001731060619931668, "timestamp": "2025-09-10 02:31:50.915673", "step": 5790, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:50.944182", "step": 5790, "epoch": 3 }, { "type": "loss", "content": 8.914165664464235e-05, "timestamp": "2025-09-10 02:31:50.946125", "step": 5791, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:50.975025", "step": 5791, "epoch": 3 }, { "type": "loss", "content": 0.00023134097864385694, "timestamp": "2025-09-10 02:31:50.998585", "step": 5792, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:51.027888", "step": 5792, "epoch": 3 }, { "type": "loss", "content": 0.0008638862636871636, "timestamp": "2025-09-10 02:31:51.029779", "step": 5793, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:51.058969", "step": 5793, "epoch": 3 }, { "type": "loss", "content": 0.0007284631137736142, "timestamp": "2025-09-10 02:31:51.060864", "step": 5794, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:51.090109", "step": 5794, "epoch": 3 }, { "type": "loss", "content": 0.0008212543907575309, "timestamp": "2025-09-10 02:31:51.091969", "step": 5795, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:51.121331", "step": 5795, "epoch": 3 }, { "type": "loss", "content": 0.0005974514642730355, "timestamp": "2025-09-10 02:31:51.144962", "step": 5796, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:51.175278", "step": 5796, "epoch": 3 }, { "type": "loss", "content": 0.00045890998444519937, "timestamp": "2025-09-10 02:31:51.177124", "step": 5797, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:51.206090", "step": 5797, "epoch": 3 }, { "type": "loss", "content": 0.0006132670678198338, "timestamp": "2025-09-10 02:31:51.208141", "step": 5798, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:51.241376", "step": 5798, "epoch": 3 }, { "type": "loss", "content": 0.006274589337408543, "timestamp": "2025-09-10 02:31:51.243271", "step": 5799, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:51.276803", "step": 5799, "epoch": 3 }, { "type": "loss", "content": 0.0025721543934196234, "timestamp": "2025-09-10 02:31:51.300187", "step": 5800, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:51.329637", "step": 5800, "epoch": 3 }, { "type": "loss", "content": 0.02713153138756752, "timestamp": "2025-09-10 02:31:51.331631", "step": 5801, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:51.361569", "step": 5801, "epoch": 3 }, { "type": "loss", "content": 0.005381520371884108, "timestamp": "2025-09-10 02:31:51.363456", "step": 5802, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:51.395179", "step": 5802, "epoch": 3 }, { "type": "loss", "content": 0.00021333516633603722, "timestamp": "2025-09-10 02:31:51.397066", "step": 5803, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:51.432629", "step": 5803, "epoch": 3 }, { "type": "loss", "content": 0.013148345984518528, "timestamp": "2025-09-10 02:31:51.455952", "step": 5804, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:51.493342", "step": 5804, "epoch": 3 }, { "type": "loss", "content": 0.0017936977092176676, "timestamp": "2025-09-10 02:31:51.495445", "step": 5805, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:51.529144", "step": 5805, "epoch": 3 }, { "type": "loss", "content": 0.0025234627537429333, "timestamp": "2025-09-10 02:31:51.530972", "step": 5806, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:51.564887", "step": 5806, "epoch": 3 }, { "type": "loss", "content": 0.029608607292175293, "timestamp": "2025-09-10 02:31:51.566854", "step": 5807, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:51.601288", "step": 5807, "epoch": 3 }, { "type": "loss", "content": 0.0006367245805449784, "timestamp": "2025-09-10 02:31:51.624639", "step": 5808, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:51.658145", "step": 5808, "epoch": 3 }, { "type": "loss", "content": 0.013967028819024563, "timestamp": "2025-09-10 02:31:51.660111", "step": 5809, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:51.700282", "step": 5809, "epoch": 3 }, { "type": "loss", "content": 0.038395337760448456, "timestamp": "2025-09-10 02:31:51.702246", "step": 5810, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:51.740215", "step": 5810, "epoch": 3 }, { "type": "loss", "content": 0.0006521128234453499, "timestamp": "2025-09-10 02:31:51.742371", "step": 5811, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:51.777028", "step": 5811, "epoch": 3 }, { "type": "loss", "content": 0.0009065433405339718, "timestamp": "2025-09-10 02:31:51.800571", "step": 5812, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:51.837874", "step": 5812, "epoch": 3 }, { "type": "loss", "content": 0.0004069570859428495, "timestamp": "2025-09-10 02:31:51.839799", "step": 5813, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:51.868942", "step": 5813, "epoch": 3 }, { "type": "loss", "content": 0.0006121239275671542, "timestamp": "2025-09-10 02:31:51.871505", "step": 5814, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:51.904491", "step": 5814, "epoch": 3 }, { "type": "loss", "content": 0.0018002677243202925, "timestamp": "2025-09-10 02:31:51.906327", "step": 5815, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:51.936038", "step": 5815, "epoch": 3 }, { "type": "loss", "content": 0.0005376306944526732, "timestamp": "2025-09-10 02:31:51.959424", "step": 5816, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:51.988676", "step": 5816, "epoch": 3 }, { "type": "loss", "content": 0.003357131266966462, "timestamp": "2025-09-10 02:31:51.990791", "step": 5817, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:52.020505", "step": 5817, "epoch": 3 }, { "type": "loss", "content": 0.00049630954163149, "timestamp": "2025-09-10 02:31:52.022567", "step": 5818, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:52.052230", "step": 5818, "epoch": 3 }, { "type": "loss", "content": 0.0072017270140349865, "timestamp": "2025-09-10 02:31:52.054080", "step": 5819, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:52.084707", "step": 5819, "epoch": 3 }, { "type": "loss", "content": 0.0019593555480241776, "timestamp": "2025-09-10 02:31:52.108299", "step": 5820, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:52.138735", "step": 5820, "epoch": 3 }, { "type": "loss", "content": 0.0036003238055855036, "timestamp": "2025-09-10 02:31:52.140571", "step": 5821, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:52.169655", "step": 5821, "epoch": 3 }, { "type": "loss", "content": 0.00046660873340442777, "timestamp": "2025-09-10 02:31:52.171643", "step": 5822, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:52.200737", "step": 5822, "epoch": 3 }, { "type": "loss", "content": 0.045283399522304535, "timestamp": "2025-09-10 02:31:52.202591", "step": 5823, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:52.231705", "step": 5823, "epoch": 3 }, { "type": "loss", "content": 0.0009746053256094456, "timestamp": "2025-09-10 02:31:52.255239", "step": 5824, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:52.285400", "step": 5824, "epoch": 3 }, { "type": "loss", "content": 0.0008497206727042794, "timestamp": "2025-09-10 02:31:52.287238", "step": 5825, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:52.316933", "step": 5825, "epoch": 3 }, { "type": "loss", "content": 0.0004335683770477772, "timestamp": "2025-09-10 02:31:52.319057", "step": 5826, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:52.348011", "step": 5826, "epoch": 3 }, { "type": "loss", "content": 0.0022922304924577475, "timestamp": "2025-09-10 02:31:52.349935", "step": 5827, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:52.379966", "step": 5827, "epoch": 3 }, { "type": "loss", "content": 0.0004272214137017727, "timestamp": "2025-09-10 02:31:52.404518", "step": 5828, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:52.434294", "step": 5828, "epoch": 3 }, { "type": "loss", "content": 0.009445090778172016, "timestamp": "2025-09-10 02:31:52.436265", "step": 5829, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:52.466402", "step": 5829, "epoch": 3 }, { "type": "loss", "content": 0.00040190972504206, "timestamp": "2025-09-10 02:31:52.468277", "step": 5830, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:52.497234", "step": 5830, "epoch": 3 }, { "type": "loss", "content": 0.0007058191695250571, "timestamp": "2025-09-10 02:31:52.499275", "step": 5831, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:52.528593", "step": 5831, "epoch": 3 }, { "type": "loss", "content": 0.0005436336505226791, "timestamp": "2025-09-10 02:31:52.551978", "step": 5832, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:31:52.581381", "step": 5832, "epoch": 3 }, { "type": "loss", "content": 0.0031013458501547575, "timestamp": "2025-09-10 02:31:52.583328", "step": 5833, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:52.613344", "step": 5833, "epoch": 3 }, { "type": "loss", "content": 0.0003365006123203784, "timestamp": "2025-09-10 02:31:52.615411", "step": 5834, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:52.645361", "step": 5834, "epoch": 3 }, { "type": "loss", "content": 0.002159892814233899, "timestamp": "2025-09-10 02:31:52.647335", "step": 5835, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:52.676897", "step": 5835, "epoch": 3 }, { "type": "loss", "content": 0.001839548465795815, "timestamp": "2025-09-10 02:31:52.700300", "step": 5836, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:52.730630", "step": 5836, "epoch": 3 }, { "type": "loss", "content": 0.012229649350047112, "timestamp": "2025-09-10 02:31:52.732493", "step": 5837, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:52.761874", "step": 5837, "epoch": 3 }, { "type": "loss", "content": 0.00215616705827415, "timestamp": "2025-09-10 02:31:52.763780", "step": 5838, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:52.792815", "step": 5838, "epoch": 3 }, { "type": "loss", "content": 0.0003829559136647731, "timestamp": "2025-09-10 02:31:52.794775", "step": 5839, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:52.824006", "step": 5839, "epoch": 3 }, { "type": "loss", "content": 0.0007110520382411778, "timestamp": "2025-09-10 02:31:52.847482", "step": 5840, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:52.877098", "step": 5840, "epoch": 3 }, { "type": "loss", "content": 0.005053516011685133, "timestamp": "2025-09-10 02:31:52.879209", "step": 5841, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:52.908629", "step": 5841, "epoch": 3 }, { "type": "loss", "content": 0.0011301599442958832, "timestamp": "2025-09-10 02:31:52.910385", "step": 5842, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:52.939694", "step": 5842, "epoch": 3 }, { "type": "loss", "content": 0.00040204497054219246, "timestamp": "2025-09-10 02:31:52.941414", "step": 5843, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:52.971693", "step": 5843, "epoch": 3 }, { "type": "loss", "content": 0.0005023189005441964, "timestamp": "2025-09-10 02:31:52.995426", "step": 5844, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:53.026918", "step": 5844, "epoch": 3 }, { "type": "loss", "content": 0.003944059368222952, "timestamp": "2025-09-10 02:31:53.028776", "step": 5845, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:53.057768", "step": 5845, "epoch": 3 }, { "type": "loss", "content": 0.00814901851117611, "timestamp": "2025-09-10 02:31:53.059975", "step": 5846, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:53.089313", "step": 5846, "epoch": 3 }, { "type": "loss", "content": 0.0005188480718061328, "timestamp": "2025-09-10 02:31:53.093668", "step": 5847, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:53.123245", "step": 5847, "epoch": 3 }, { "type": "loss", "content": 0.0526927225291729, "timestamp": "2025-09-10 02:31:53.146581", "step": 5848, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:53.178308", "step": 5848, "epoch": 3 }, { "type": "loss", "content": 0.003942835610359907, "timestamp": "2025-09-10 02:31:53.180061", "step": 5849, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:53.210017", "step": 5849, "epoch": 3 }, { "type": "loss", "content": 0.0014322620118036866, "timestamp": "2025-09-10 02:31:53.211834", "step": 5850, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:53.246306", "step": 5850, "epoch": 3 }, { "type": "loss", "content": 0.0004631231422536075, "timestamp": "2025-09-10 02:31:53.248151", "step": 5851, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:53.279559", "step": 5851, "epoch": 3 }, { "type": "loss", "content": 0.0005005900748074055, "timestamp": "2025-09-10 02:31:53.302877", "step": 5852, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:53.335605", "step": 5852, "epoch": 3 }, { "type": "loss", "content": 0.0005041599506512284, "timestamp": "2025-09-10 02:31:53.337428", "step": 5853, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:53.367372", "step": 5853, "epoch": 3 }, { "type": "loss", "content": 0.001684217480942607, "timestamp": "2025-09-10 02:31:53.373889", "step": 5854, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:53.412025", "step": 5854, "epoch": 3 }, { "type": "loss", "content": 0.0003901127784047276, "timestamp": "2025-09-10 02:31:53.419643", "step": 5855, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:53.449706", "step": 5855, "epoch": 3 }, { "type": "loss", "content": 0.006597200874239206, "timestamp": "2025-09-10 02:31:53.478440", "step": 5856, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:53.510928", "step": 5856, "epoch": 3 }, { "type": "loss", "content": 0.0013561425730586052, "timestamp": "2025-09-10 02:31:53.514196", "step": 5857, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:53.546577", "step": 5857, "epoch": 3 }, { "type": "loss", "content": 0.0006679397192783654, "timestamp": "2025-09-10 02:31:53.548432", "step": 5858, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:53.584326", "step": 5858, "epoch": 3 }, { "type": "loss", "content": 0.0003755086218006909, "timestamp": "2025-09-10 02:31:53.586891", "step": 5859, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:53.618786", "step": 5859, "epoch": 3 }, { "type": "loss", "content": 0.00016341873561032116, "timestamp": "2025-09-10 02:31:53.642278", "step": 5860, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:53.676004", "step": 5860, "epoch": 3 }, { "type": "loss", "content": 0.001248411601409316, "timestamp": "2025-09-10 02:31:53.677849", "step": 5861, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:53.718426", "step": 5861, "epoch": 3 }, { "type": "loss", "content": 0.050770360976457596, "timestamp": "2025-09-10 02:31:53.720181", "step": 5862, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:53.756187", "step": 5862, "epoch": 3 }, { "type": "loss", "content": 0.0010143638355657458, "timestamp": "2025-09-10 02:31:53.758185", "step": 5863, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:53.796248", "step": 5863, "epoch": 3 }, { "type": "loss", "content": 0.000550613272935152, "timestamp": "2025-09-10 02:31:53.820062", "step": 5864, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:53.848526", "step": 5864, "epoch": 3 }, { "type": "loss", "content": 0.0018723468529060483, "timestamp": "2025-09-10 02:31:53.850540", "step": 5865, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:53.879570", "step": 5865, "epoch": 3 }, { "type": "loss", "content": 0.004078911151736975, "timestamp": "2025-09-10 02:31:53.882249", "step": 5866, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:53.912604", "step": 5866, "epoch": 3 }, { "type": "loss", "content": 0.0042920708656311035, "timestamp": "2025-09-10 02:31:53.914421", "step": 5867, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:53.944436", "step": 5867, "epoch": 3 }, { "type": "loss", "content": 0.0004184663703199476, "timestamp": "2025-09-10 02:31:53.968016", "step": 5868, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:53.998054", "step": 5868, "epoch": 3 }, { "type": "loss", "content": 0.000920470745768398, "timestamp": "2025-09-10 02:31:54.000266", "step": 5869, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:54.030164", "step": 5869, "epoch": 3 }, { "type": "loss", "content": 0.0045341490767896175, "timestamp": "2025-09-10 02:31:54.032093", "step": 5870, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:54.060869", "step": 5870, "epoch": 3 }, { "type": "loss", "content": 0.014640615321695805, "timestamp": "2025-09-10 02:31:54.063086", "step": 5871, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:54.092417", "step": 5871, "epoch": 3 }, { "type": "loss", "content": 0.000992514076642692, "timestamp": "2025-09-10 02:31:54.115823", "step": 5872, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:54.145292", "step": 5872, "epoch": 3 }, { "type": "loss", "content": 0.00285705947317183, "timestamp": "2025-09-10 02:31:54.147053", "step": 5873, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:54.176029", "step": 5873, "epoch": 3 }, { "type": "loss", "content": 0.0011258694576099515, "timestamp": "2025-09-10 02:31:54.177631", "step": 5874, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:54.206389", "step": 5874, "epoch": 3 }, { "type": "loss", "content": 0.0012898036511614919, "timestamp": "2025-09-10 02:31:54.208384", "step": 5875, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:54.237269", "step": 5875, "epoch": 3 }, { "type": "loss", "content": 0.002289626281708479, "timestamp": "2025-09-10 02:31:54.260714", "step": 5876, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:54.289897", "step": 5876, "epoch": 3 }, { "type": "loss", "content": 0.0010736450785771012, "timestamp": "2025-09-10 02:31:54.291830", "step": 5877, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:54.320492", "step": 5877, "epoch": 3 }, { "type": "loss", "content": 0.0017673444235697389, "timestamp": "2025-09-10 02:31:54.322274", "step": 5878, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:54.352502", "step": 5878, "epoch": 3 }, { "type": "loss", "content": 0.0006449141656048596, "timestamp": "2025-09-10 02:31:54.354429", "step": 5879, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:54.387157", "step": 5879, "epoch": 3 }, { "type": "loss", "content": 0.0019490603590384126, "timestamp": "2025-09-10 02:31:54.410394", "step": 5880, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:54.439696", "step": 5880, "epoch": 3 }, { "type": "loss", "content": 0.03452775254845619, "timestamp": "2025-09-10 02:31:54.441554", "step": 5881, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:54.470566", "step": 5881, "epoch": 3 }, { "type": "loss", "content": 0.0015733633190393448, "timestamp": "2025-09-10 02:31:54.472185", "step": 5882, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:54.500917", "step": 5882, "epoch": 3 }, { "type": "loss", "content": 0.0012607196113094687, "timestamp": "2025-09-10 02:31:54.503111", "step": 5883, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:54.532362", "step": 5883, "epoch": 3 }, { "type": "loss", "content": 0.015937503427267075, "timestamp": "2025-09-10 02:31:54.556031", "step": 5884, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:54.586255", "step": 5884, "epoch": 3 }, { "type": "loss", "content": 0.0011639997828751802, "timestamp": "2025-09-10 02:31:54.587987", "step": 5885, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:54.617817", "step": 5885, "epoch": 3 }, { "type": "loss", "content": 0.0006883523310534656, "timestamp": "2025-09-10 02:31:54.619596", "step": 5886, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:54.648748", "step": 5886, "epoch": 3 }, { "type": "loss", "content": 0.0019631306640803814, "timestamp": "2025-09-10 02:31:54.650583", "step": 5887, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:54.679866", "step": 5887, "epoch": 3 }, { "type": "loss", "content": 0.02031813934445381, "timestamp": "2025-09-10 02:31:54.703359", "step": 5888, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:54.733765", "step": 5888, "epoch": 3 }, { "type": "loss", "content": 0.02791324444115162, "timestamp": "2025-09-10 02:31:54.735594", "step": 5889, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:54.764386", "step": 5889, "epoch": 3 }, { "type": "loss", "content": 0.0005248067318461835, "timestamp": "2025-09-10 02:31:54.765995", "step": 5890, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:54.794937", "step": 5890, "epoch": 3 }, { "type": "loss", "content": 0.0004645258595701307, "timestamp": "2025-09-10 02:31:54.796544", "step": 5891, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:54.825292", "step": 5891, "epoch": 3 }, { "type": "loss", "content": 0.0017598795238882303, "timestamp": "2025-09-10 02:31:54.848731", "step": 5892, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:54.879158", "step": 5892, "epoch": 3 }, { "type": "loss", "content": 0.0008864394039846957, "timestamp": "2025-09-10 02:31:54.880835", "step": 5893, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:54.909438", "step": 5893, "epoch": 3 }, { "type": "loss", "content": 0.00020339501497801393, "timestamp": "2025-09-10 02:31:54.911332", "step": 5894, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:54.940462", "step": 5894, "epoch": 3 }, { "type": "loss", "content": 0.024470051750540733, "timestamp": "2025-09-10 02:31:54.942378", "step": 5895, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:54.971771", "step": 5895, "epoch": 3 }, { "type": "loss", "content": 0.0006051209638826549, "timestamp": "2025-09-10 02:31:54.995111", "step": 5896, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:55.024545", "step": 5896, "epoch": 3 }, { "type": "loss", "content": 0.0007404032512567937, "timestamp": "2025-09-10 02:31:55.026184", "step": 5897, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:55.055872", "step": 5897, "epoch": 3 }, { "type": "loss", "content": 0.00032552939956076443, "timestamp": "2025-09-10 02:31:55.057797", "step": 5898, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:55.087012", "step": 5898, "epoch": 3 }, { "type": "loss", "content": 0.003726254915818572, "timestamp": "2025-09-10 02:31:55.088908", "step": 5899, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:55.117630", "step": 5899, "epoch": 3 }, { "type": "loss", "content": 0.0005625720368698239, "timestamp": "2025-09-10 02:31:55.140716", "step": 5900, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:55.169838", "step": 5900, "epoch": 3 }, { "type": "loss", "content": 0.0009212298900820315, "timestamp": "2025-09-10 02:31:55.171602", "step": 5901, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:55.201818", "step": 5901, "epoch": 3 }, { "type": "loss", "content": 0.0010035239392891526, "timestamp": "2025-09-10 02:31:55.203429", "step": 5902, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:55.236570", "step": 5902, "epoch": 3 }, { "type": "loss", "content": 0.0016566301928833127, "timestamp": "2025-09-10 02:31:55.238521", "step": 5903, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:55.270811", "step": 5903, "epoch": 3 }, { "type": "loss", "content": 0.00018501277372706681, "timestamp": "2025-09-10 02:31:55.294073", "step": 5904, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:55.323027", "step": 5904, "epoch": 3 }, { "type": "loss", "content": 0.0008375718025490642, "timestamp": "2025-09-10 02:31:55.325336", "step": 5905, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:55.356208", "step": 5905, "epoch": 3 }, { "type": "loss", "content": 0.00016759525169618428, "timestamp": "2025-09-10 02:31:55.358284", "step": 5906, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:55.389930", "step": 5906, "epoch": 3 }, { "type": "loss", "content": 0.006352846045047045, "timestamp": "2025-09-10 02:31:55.391800", "step": 5907, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:55.425032", "step": 5907, "epoch": 3 }, { "type": "loss", "content": 0.0020307323429733515, "timestamp": "2025-09-10 02:31:55.448322", "step": 5908, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:55.483577", "step": 5908, "epoch": 3 }, { "type": "loss", "content": 0.00038759110611863434, "timestamp": "2025-09-10 02:31:55.485277", "step": 5909, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:55.517977", "step": 5909, "epoch": 3 }, { "type": "loss", "content": 0.0009370900806970894, "timestamp": "2025-09-10 02:31:55.519595", "step": 5910, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:55.553212", "step": 5910, "epoch": 3 }, { "type": "loss", "content": 0.0011055589420720935, "timestamp": "2025-09-10 02:31:55.555168", "step": 5911, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:55.590823", "step": 5911, "epoch": 3 }, { "type": "loss", "content": 0.0025008826050907373, "timestamp": "2025-09-10 02:31:55.614135", "step": 5912, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:55.645030", "step": 5912, "epoch": 3 }, { "type": "loss", "content": 0.0004107403219677508, "timestamp": "2025-09-10 02:31:55.647052", "step": 5913, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:55.683495", "step": 5913, "epoch": 3 }, { "type": "loss", "content": 0.006134289316833019, "timestamp": "2025-09-10 02:31:55.685236", "step": 5914, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:55.724471", "step": 5914, "epoch": 3 }, { "type": "loss", "content": 0.00103579752612859, "timestamp": "2025-09-10 02:31:55.726673", "step": 5915, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:55.761720", "step": 5915, "epoch": 3 }, { "type": "loss", "content": 0.006375829689204693, "timestamp": "2025-09-10 02:31:55.785049", "step": 5916, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:55.823156", "step": 5916, "epoch": 3 }, { "type": "loss", "content": 0.00024464138550683856, "timestamp": "2025-09-10 02:31:55.828580", "step": 5917, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:55.861859", "step": 5917, "epoch": 3 }, { "type": "loss", "content": 0.0005653674597851932, "timestamp": "2025-09-10 02:31:55.863555", "step": 5918, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:55.892613", "step": 5918, "epoch": 3 }, { "type": "loss", "content": 0.013253341428935528, "timestamp": "2025-09-10 02:31:55.894386", "step": 5919, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:55.923208", "step": 5919, "epoch": 3 }, { "type": "loss", "content": 0.0002616856654640287, "timestamp": "2025-09-10 02:31:55.946392", "step": 5920, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:55.975194", "step": 5920, "epoch": 3 }, { "type": "loss", "content": 0.005493378732353449, "timestamp": "2025-09-10 02:31:55.977009", "step": 5921, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:56.005628", "step": 5921, "epoch": 3 }, { "type": "loss", "content": 0.0023588540498167276, "timestamp": "2025-09-10 02:31:56.007529", "step": 5922, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:56.036425", "step": 5922, "epoch": 3 }, { "type": "loss", "content": 0.0014897006331011653, "timestamp": "2025-09-10 02:31:56.038208", "step": 5923, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:56.067038", "step": 5923, "epoch": 3 }, { "type": "loss", "content": 0.0002015876380028203, "timestamp": "2025-09-10 02:31:56.090331", "step": 5924, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:56.124105", "step": 5924, "epoch": 3 }, { "type": "loss", "content": 0.0007182031986303627, "timestamp": "2025-09-10 02:31:56.126203", "step": 5925, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:56.161037", "step": 5925, "epoch": 3 }, { "type": "loss", "content": 0.00025931946584023535, "timestamp": "2025-09-10 02:31:56.162991", "step": 5926, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:56.197824", "step": 5926, "epoch": 3 }, { "type": "loss", "content": 0.0275877732783556, "timestamp": "2025-09-10 02:31:56.202446", "step": 5927, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:56.234655", "step": 5927, "epoch": 3 }, { "type": "loss", "content": 0.001264668651856482, "timestamp": "2025-09-10 02:31:56.258378", "step": 5928, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 96 ], "batch_size": 8, "flops": 1898404492032 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 5, 80 ], "batch_size": 8, "flops": 1582003754624 } ], "timestamp": "2025-09-10 02:31:58.184501", "step": 5928, "epoch": 3 }, { "type": "pplx", "content": 2466542.7706270437, "timestamp": "2025-09-10 02:31:58.186501", "step": 5928, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:58.214004", "step": 5928, "epoch": 3 }, { "type": "loss", "content": 0.002576331840828061, "timestamp": "2025-09-10 02:31:58.215811", "step": 5929, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:58.245130", "step": 5929, "epoch": 3 }, { "type": "loss", "content": 0.00018019216076936573, "timestamp": "2025-09-10 02:31:58.246934", "step": 5930, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:58.276054", "step": 5930, "epoch": 3 }, { "type": "loss", "content": 0.00048270850675180554, "timestamp": "2025-09-10 02:31:58.277925", "step": 5931, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:58.307356", "step": 5931, "epoch": 3 }, { "type": "loss", "content": 0.03198447450995445, "timestamp": "2025-09-10 02:31:58.330665", "step": 5932, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:58.359832", "step": 5932, "epoch": 3 }, { "type": "loss", "content": 0.00030957843409851193, "timestamp": "2025-09-10 02:31:58.361462", "step": 5933, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:58.390590", "step": 5933, "epoch": 3 }, { "type": "loss", "content": 0.032051801681518555, "timestamp": "2025-09-10 02:31:58.392136", "step": 5934, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:58.420940", "step": 5934, "epoch": 3 }, { "type": "loss", "content": 0.006861783564090729, "timestamp": "2025-09-10 02:31:58.422710", "step": 5935, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:58.451271", "step": 5935, "epoch": 3 }, { "type": "loss", "content": 0.00024682050570845604, "timestamp": "2025-09-10 02:31:58.474823", "step": 5936, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:58.507240", "step": 5936, "epoch": 3 }, { "type": "loss", "content": 0.01753365434706211, "timestamp": "2025-09-10 02:31:58.509197", "step": 5937, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:58.538031", "step": 5937, "epoch": 3 }, { "type": "loss", "content": 0.0010043384972959757, "timestamp": "2025-09-10 02:31:58.539864", "step": 5938, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:58.568838", "step": 5938, "epoch": 3 }, { "type": "loss", "content": 0.002193879336118698, "timestamp": "2025-09-10 02:31:58.570550", "step": 5939, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:58.599443", "step": 5939, "epoch": 3 }, { "type": "loss", "content": 0.00029714597621932626, "timestamp": "2025-09-10 02:31:58.622606", "step": 5940, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:58.651623", "step": 5940, "epoch": 3 }, { "type": "loss", "content": 0.00016494235023856163, "timestamp": "2025-09-10 02:31:58.653285", "step": 5941, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:58.682018", "step": 5941, "epoch": 3 }, { "type": "loss", "content": 0.00010710857168305665, "timestamp": "2025-09-10 02:31:58.683825", "step": 5942, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:58.712386", "step": 5942, "epoch": 3 }, { "type": "loss", "content": 0.013505185022950172, "timestamp": "2025-09-10 02:31:58.714249", "step": 5943, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:58.743289", "step": 5943, "epoch": 3 }, { "type": "loss", "content": 0.0005287728854455054, "timestamp": "2025-09-10 02:31:58.766354", "step": 5944, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:58.795067", "step": 5944, "epoch": 3 }, { "type": "loss", "content": 0.0002007946459343657, "timestamp": "2025-09-10 02:31:58.797069", "step": 5945, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:58.825866", "step": 5945, "epoch": 3 }, { "type": "loss", "content": 0.001181377680040896, "timestamp": "2025-09-10 02:31:58.827841", "step": 5946, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:58.857665", "step": 5946, "epoch": 3 }, { "type": "loss", "content": 0.0009074655245058239, "timestamp": "2025-09-10 02:31:58.859165", "step": 5947, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:58.888086", "step": 5947, "epoch": 3 }, { "type": "loss", "content": 0.0500468946993351, "timestamp": "2025-09-10 02:31:58.911298", "step": 5948, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:58.940518", "step": 5948, "epoch": 3 }, { "type": "loss", "content": 0.00016007278463803232, "timestamp": "2025-09-10 02:31:58.942217", "step": 5949, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:58.971482", "step": 5949, "epoch": 3 }, { "type": "loss", "content": 0.0016039551701396704, "timestamp": "2025-09-10 02:31:58.973493", "step": 5950, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:31:59.002691", "step": 5950, "epoch": 3 }, { "type": "loss", "content": 0.005725264549255371, "timestamp": "2025-09-10 02:31:59.004647", "step": 5951, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:59.033503", "step": 5951, "epoch": 3 }, { "type": "loss", "content": 0.010027670301496983, "timestamp": "2025-09-10 02:31:59.056938", "step": 5952, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:59.100968", "step": 5952, "epoch": 3 }, { "type": "loss", "content": 0.0019840996246784925, "timestamp": "2025-09-10 02:31:59.102882", "step": 5953, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:59.131539", "step": 5953, "epoch": 3 }, { "type": "loss", "content": 0.004044392611831427, "timestamp": "2025-09-10 02:31:59.133287", "step": 5954, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:59.162571", "step": 5954, "epoch": 3 }, { "type": "loss", "content": 0.00650023901835084, "timestamp": "2025-09-10 02:31:59.164476", "step": 5955, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:59.195932", "step": 5955, "epoch": 3 }, { "type": "loss", "content": 0.0003615424211602658, "timestamp": "2025-09-10 02:31:59.223779", "step": 5956, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:59.256305", "step": 5956, "epoch": 3 }, { "type": "loss", "content": 0.028546255081892014, "timestamp": "2025-09-10 02:31:59.257999", "step": 5957, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:59.299945", "step": 5957, "epoch": 3 }, { "type": "loss", "content": 0.011445007286965847, "timestamp": "2025-09-10 02:31:59.301678", "step": 5958, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:59.331151", "step": 5958, "epoch": 3 }, { "type": "loss", "content": 0.0012820158153772354, "timestamp": "2025-09-10 02:31:59.332913", "step": 5959, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:59.361600", "step": 5959, "epoch": 3 }, { "type": "loss", "content": 0.0003934795968234539, "timestamp": "2025-09-10 02:31:59.385022", "step": 5960, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:59.417678", "step": 5960, "epoch": 3 }, { "type": "loss", "content": 0.00023115640215110034, "timestamp": "2025-09-10 02:31:59.419492", "step": 5961, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:59.450992", "step": 5961, "epoch": 3 }, { "type": "loss", "content": 0.0009675567853264511, "timestamp": "2025-09-10 02:31:59.452970", "step": 5962, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:59.488314", "step": 5962, "epoch": 3 }, { "type": "loss", "content": 0.0015642615035176277, "timestamp": "2025-09-10 02:31:59.492027", "step": 5963, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:59.529088", "step": 5963, "epoch": 3 }, { "type": "loss", "content": 0.0010022606002166867, "timestamp": "2025-09-10 02:31:59.552545", "step": 5964, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:59.592894", "step": 5964, "epoch": 3 }, { "type": "loss", "content": 0.00016989861615002155, "timestamp": "2025-09-10 02:31:59.594853", "step": 5965, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:59.627923", "step": 5965, "epoch": 3 }, { "type": "loss", "content": 0.0013952319277450442, "timestamp": "2025-09-10 02:31:59.629846", "step": 5966, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:59.666271", "step": 5966, "epoch": 3 }, { "type": "loss", "content": 0.006576180923730135, "timestamp": "2025-09-10 02:31:59.668072", "step": 5967, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:59.708126", "step": 5967, "epoch": 3 }, { "type": "loss", "content": 0.003177030710503459, "timestamp": "2025-09-10 02:31:59.731549", "step": 5968, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:59.773320", "step": 5968, "epoch": 3 }, { "type": "loss", "content": 0.0003268076397944242, "timestamp": "2025-09-10 02:31:59.775306", "step": 5969, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:31:59.814971", "step": 5969, "epoch": 3 }, { "type": "loss", "content": 0.001026048674248159, "timestamp": "2025-09-10 02:31:59.816842", "step": 5970, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:59.846495", "step": 5970, "epoch": 3 }, { "type": "loss", "content": 0.0011224242625758052, "timestamp": "2025-09-10 02:31:59.848384", "step": 5971, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:59.879190", "step": 5971, "epoch": 3 }, { "type": "loss", "content": 0.0065633621998131275, "timestamp": "2025-09-10 02:31:59.902812", "step": 5972, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:59.934696", "step": 5972, "epoch": 3 }, { "type": "loss", "content": 0.00255127833224833, "timestamp": "2025-09-10 02:31:59.936480", "step": 5973, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:59.965207", "step": 5973, "epoch": 3 }, { "type": "loss", "content": 0.004116647876799107, "timestamp": "2025-09-10 02:31:59.967068", "step": 5974, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:31:59.995787", "step": 5974, "epoch": 3 }, { "type": "loss", "content": 0.00024878952535800636, "timestamp": "2025-09-10 02:31:59.997826", "step": 5975, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:00.026627", "step": 5975, "epoch": 3 }, { "type": "loss", "content": 0.001366226002573967, "timestamp": "2025-09-10 02:32:00.049967", "step": 5976, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:00.080349", "step": 5976, "epoch": 3 }, { "type": "loss", "content": 0.0002822635869961232, "timestamp": "2025-09-10 02:32:00.082394", "step": 5977, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:00.111382", "step": 5977, "epoch": 3 }, { "type": "loss", "content": 0.0005551199428737164, "timestamp": "2025-09-10 02:32:00.113076", "step": 5978, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:00.142619", "step": 5978, "epoch": 3 }, { "type": "loss", "content": 0.0003631310537457466, "timestamp": "2025-09-10 02:32:00.144683", "step": 5979, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:00.174555", "step": 5979, "epoch": 3 }, { "type": "loss", "content": 0.00014048561570234597, "timestamp": "2025-09-10 02:32:00.198000", "step": 5980, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:00.227229", "step": 5980, "epoch": 3 }, { "type": "loss", "content": 0.00020421433146111667, "timestamp": "2025-09-10 02:32:00.229054", "step": 5981, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:00.257730", "step": 5981, "epoch": 3 }, { "type": "loss", "content": 0.0002952653740067035, "timestamp": "2025-09-10 02:32:00.259577", "step": 5982, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:00.288274", "step": 5982, "epoch": 3 }, { "type": "loss", "content": 0.004466590005904436, "timestamp": "2025-09-10 02:32:00.290089", "step": 5983, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:00.319073", "step": 5983, "epoch": 3 }, { "type": "loss", "content": 0.00020575194503180683, "timestamp": "2025-09-10 02:32:00.342312", "step": 5984, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:00.371529", "step": 5984, "epoch": 3 }, { "type": "loss", "content": 0.00958172231912613, "timestamp": "2025-09-10 02:32:00.373424", "step": 5985, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:00.403076", "step": 5985, "epoch": 3 }, { "type": "loss", "content": 0.00015885476022958755, "timestamp": "2025-09-10 02:32:00.404881", "step": 5986, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:00.433719", "step": 5986, "epoch": 3 }, { "type": "loss", "content": 0.0006440441939048469, "timestamp": "2025-09-10 02:32:00.435877", "step": 5987, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:00.464931", "step": 5987, "epoch": 3 }, { "type": "loss", "content": 0.00075487419962883, "timestamp": "2025-09-10 02:32:00.488267", "step": 5988, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:00.516934", "step": 5988, "epoch": 3 }, { "type": "loss", "content": 0.0002352559968130663, "timestamp": "2025-09-10 02:32:00.518692", "step": 5989, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:00.547333", "step": 5989, "epoch": 3 }, { "type": "loss", "content": 0.00017876646597869694, "timestamp": "2025-09-10 02:32:00.549207", "step": 5990, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:00.578010", "step": 5990, "epoch": 3 }, { "type": "loss", "content": 0.0005139493150636554, "timestamp": "2025-09-10 02:32:00.579764", "step": 5991, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:00.608622", "step": 5991, "epoch": 3 }, { "type": "loss", "content": 0.0018390259938314557, "timestamp": "2025-09-10 02:32:00.631911", "step": 5992, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:00.660834", "step": 5992, "epoch": 3 }, { "type": "loss", "content": 0.0001986074639717117, "timestamp": "2025-09-10 02:32:00.662890", "step": 5993, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:00.692434", "step": 5993, "epoch": 3 }, { "type": "loss", "content": 0.0002618823782540858, "timestamp": "2025-09-10 02:32:00.694355", "step": 5994, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:00.724449", "step": 5994, "epoch": 3 }, { "type": "loss", "content": 0.00036912746145389974, "timestamp": "2025-09-10 02:32:00.726562", "step": 5995, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:00.755651", "step": 5995, "epoch": 3 }, { "type": "loss", "content": 0.0005986754549667239, "timestamp": "2025-09-10 02:32:00.779150", "step": 5996, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:00.808677", "step": 5996, "epoch": 3 }, { "type": "loss", "content": 0.0009047608473338187, "timestamp": "2025-09-10 02:32:00.810347", "step": 5997, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:00.839162", "step": 5997, "epoch": 3 }, { "type": "loss", "content": 0.000149157625855878, "timestamp": "2025-09-10 02:32:00.840915", "step": 5998, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:00.869577", "step": 5998, "epoch": 3 }, { "type": "loss", "content": 0.00014819527859799564, "timestamp": "2025-09-10 02:32:00.871249", "step": 5999, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:00.900053", "step": 5999, "epoch": 3 }, { "type": "loss", "content": 0.0021587680093944073, "timestamp": "2025-09-10 02:32:00.923086", "step": 6000, "epoch": 3 }, { "type": "info", "content": "Checkpoint saved at step 6000", "timestamp": "2025-09-10 02:32:05.795377", "step": 6000, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:05.838231", "step": 6000, "epoch": 3 }, { "type": "loss", "content": 0.0004505121323745698, "timestamp": "2025-09-10 02:32:05.840064", "step": 6001, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:05.869305", "step": 6001, "epoch": 3 }, { "type": "loss", "content": 0.000520777830388397, "timestamp": "2025-09-10 02:32:05.871307", "step": 6002, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:05.900587", "step": 6002, "epoch": 3 }, { "type": "loss", "content": 0.00018003354489337653, "timestamp": "2025-09-10 02:32:05.902259", "step": 6003, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:05.931803", "step": 6003, "epoch": 3 }, { "type": "loss", "content": 8.050748147070408e-05, "timestamp": "2025-09-10 02:32:05.955549", "step": 6004, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:05.984504", "step": 6004, "epoch": 3 }, { "type": "loss", "content": 8.989882189780474e-05, "timestamp": "2025-09-10 02:32:05.986420", "step": 6005, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:06.015704", "step": 6005, "epoch": 3 }, { "type": "loss", "content": 0.00030490991775877774, "timestamp": "2025-09-10 02:32:06.017549", "step": 6006, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:06.046561", "step": 6006, "epoch": 3 }, { "type": "loss", "content": 0.001373602426610887, "timestamp": "2025-09-10 02:32:06.048419", "step": 6007, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:06.078898", "step": 6007, "epoch": 3 }, { "type": "loss", "content": 0.001475251279771328, "timestamp": "2025-09-10 02:32:06.102602", "step": 6008, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:06.132082", "step": 6008, "epoch": 3 }, { "type": "loss", "content": 0.00022059862385503948, "timestamp": "2025-09-10 02:32:06.134168", "step": 6009, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:06.163639", "step": 6009, "epoch": 3 }, { "type": "loss", "content": 0.00019312952645123005, "timestamp": "2025-09-10 02:32:06.165340", "step": 6010, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:06.194330", "step": 6010, "epoch": 3 }, { "type": "loss", "content": 0.0001684818707872182, "timestamp": "2025-09-10 02:32:06.196276", "step": 6011, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:06.225644", "step": 6011, "epoch": 3 }, { "type": "loss", "content": 0.021128688007593155, "timestamp": "2025-09-10 02:32:06.249149", "step": 6012, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:06.278088", "step": 6012, "epoch": 3 }, { "type": "loss", "content": 0.017196234315633774, "timestamp": "2025-09-10 02:32:06.280130", "step": 6013, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:06.308736", "step": 6013, "epoch": 3 }, { "type": "loss", "content": 0.003722748253494501, "timestamp": "2025-09-10 02:32:06.310639", "step": 6014, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:06.339383", "step": 6014, "epoch": 3 }, { "type": "loss", "content": 0.00491055753082037, "timestamp": "2025-09-10 02:32:06.341137", "step": 6015, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:06.370304", "step": 6015, "epoch": 3 }, { "type": "loss", "content": 0.00023628502094652504, "timestamp": "2025-09-10 02:32:06.393682", "step": 6016, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:06.422606", "step": 6016, "epoch": 3 }, { "type": "loss", "content": 0.0003611140127759427, "timestamp": "2025-09-10 02:32:06.424446", "step": 6017, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:06.453356", "step": 6017, "epoch": 3 }, { "type": "loss", "content": 0.0001540749944979325, "timestamp": "2025-09-10 02:32:06.455102", "step": 6018, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:06.484665", "step": 6018, "epoch": 3 }, { "type": "loss", "content": 0.01930735446512699, "timestamp": "2025-09-10 02:32:06.486472", "step": 6019, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:06.515802", "step": 6019, "epoch": 3 }, { "type": "loss", "content": 0.0002698621538002044, "timestamp": "2025-09-10 02:32:06.539299", "step": 6020, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:06.568267", "step": 6020, "epoch": 3 }, { "type": "loss", "content": 0.0005506637971848249, "timestamp": "2025-09-10 02:32:06.570320", "step": 6021, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:06.599465", "step": 6021, "epoch": 3 }, { "type": "loss", "content": 0.0008389264112338424, "timestamp": "2025-09-10 02:32:06.601342", "step": 6022, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:06.630001", "step": 6022, "epoch": 3 }, { "type": "loss", "content": 0.0007258251425810158, "timestamp": "2025-09-10 02:32:06.632009", "step": 6023, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:06.660891", "step": 6023, "epoch": 3 }, { "type": "loss", "content": 0.001435625716112554, "timestamp": "2025-09-10 02:32:06.684351", "step": 6024, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:06.713960", "step": 6024, "epoch": 3 }, { "type": "loss", "content": 0.00012985989451408386, "timestamp": "2025-09-10 02:32:06.715693", "step": 6025, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:06.744482", "step": 6025, "epoch": 3 }, { "type": "loss", "content": 0.0029376517049968243, "timestamp": "2025-09-10 02:32:06.746454", "step": 6026, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:06.775262", "step": 6026, "epoch": 3 }, { "type": "loss", "content": 0.035706792026758194, "timestamp": "2025-09-10 02:32:06.777117", "step": 6027, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:06.805919", "step": 6027, "epoch": 3 }, { "type": "loss", "content": 0.0008659813902340829, "timestamp": "2025-09-10 02:32:06.829279", "step": 6028, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:06.859088", "step": 6028, "epoch": 3 }, { "type": "loss", "content": 8.571484795538709e-05, "timestamp": "2025-09-10 02:32:06.860851", "step": 6029, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:06.889448", "step": 6029, "epoch": 3 }, { "type": "loss", "content": 0.030303040519356728, "timestamp": "2025-09-10 02:32:06.891379", "step": 6030, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:06.920440", "step": 6030, "epoch": 3 }, { "type": "loss", "content": 0.0005027134902775288, "timestamp": "2025-09-10 02:32:06.922060", "step": 6031, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:06.950882", "step": 6031, "epoch": 3 }, { "type": "loss", "content": 0.00012944061018060893, "timestamp": "2025-09-10 02:32:06.974288", "step": 6032, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:07.003277", "step": 6032, "epoch": 3 }, { "type": "loss", "content": 0.05250001698732376, "timestamp": "2025-09-10 02:32:07.005201", "step": 6033, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:07.034322", "step": 6033, "epoch": 3 }, { "type": "loss", "content": 0.00019380975572858006, "timestamp": "2025-09-10 02:32:07.036135", "step": 6034, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:07.064912", "step": 6034, "epoch": 3 }, { "type": "loss", "content": 0.0017998889088630676, "timestamp": "2025-09-10 02:32:07.066535", "step": 6035, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:07.095593", "step": 6035, "epoch": 3 }, { "type": "loss", "content": 0.0006400958518497646, "timestamp": "2025-09-10 02:32:07.119021", "step": 6036, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:07.148221", "step": 6036, "epoch": 3 }, { "type": "loss", "content": 0.0007847324013710022, "timestamp": "2025-09-10 02:32:07.150461", "step": 6037, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:07.180865", "step": 6037, "epoch": 3 }, { "type": "loss", "content": 0.0006318576051853597, "timestamp": "2025-09-10 02:32:07.187546", "step": 6038, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:07.221553", "step": 6038, "epoch": 3 }, { "type": "loss", "content": 0.0011142324656248093, "timestamp": "2025-09-10 02:32:07.225696", "step": 6039, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:07.279880", "step": 6039, "epoch": 3 }, { "type": "loss", "content": 0.0009587918757461011, "timestamp": "2025-09-10 02:32:07.306949", "step": 6040, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:07.352422", "step": 6040, "epoch": 3 }, { "type": "loss", "content": 0.0011987646576017141, "timestamp": "2025-09-10 02:32:07.356975", "step": 6041, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:07.398357", "step": 6041, "epoch": 3 }, { "type": "loss", "content": 0.00021913684031460434, "timestamp": "2025-09-10 02:32:07.400289", "step": 6042, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:07.434739", "step": 6042, "epoch": 3 }, { "type": "loss", "content": 0.0005295642768032849, "timestamp": "2025-09-10 02:32:07.436606", "step": 6043, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:07.468585", "step": 6043, "epoch": 3 }, { "type": "loss", "content": 0.023086709901690483, "timestamp": "2025-09-10 02:32:07.491873", "step": 6044, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:07.525746", "step": 6044, "epoch": 3 }, { "type": "loss", "content": 0.00017024595581460744, "timestamp": "2025-09-10 02:32:07.527332", "step": 6045, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:07.559610", "step": 6045, "epoch": 3 }, { "type": "loss", "content": 0.00031210650922730565, "timestamp": "2025-09-10 02:32:07.561309", "step": 6046, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:07.595024", "step": 6046, "epoch": 3 }, { "type": "loss", "content": 0.00016917231550905854, "timestamp": "2025-09-10 02:32:07.596887", "step": 6047, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:07.627519", "step": 6047, "epoch": 3 }, { "type": "loss", "content": 0.0007761456654407084, "timestamp": "2025-09-10 02:32:07.650866", "step": 6048, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:07.687084", "step": 6048, "epoch": 3 }, { "type": "loss", "content": 0.0005857849610038102, "timestamp": "2025-09-10 02:32:07.688908", "step": 6049, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:07.727869", "step": 6049, "epoch": 3 }, { "type": "loss", "content": 0.0005356850451789796, "timestamp": "2025-09-10 02:32:07.729880", "step": 6050, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:32:07.766812", "step": 6050, "epoch": 3 }, { "type": "loss", "content": 0.00037692865589633584, "timestamp": "2025-09-10 02:32:07.768772", "step": 6051, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:07.806412", "step": 6051, "epoch": 3 }, { "type": "loss", "content": 0.0003155084268655628, "timestamp": "2025-09-10 02:32:07.829888", "step": 6052, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:07.858316", "step": 6052, "epoch": 3 }, { "type": "loss", "content": 0.0009529165108688176, "timestamp": "2025-09-10 02:32:07.860454", "step": 6053, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:07.889094", "step": 6053, "epoch": 3 }, { "type": "loss", "content": 0.016368014737963676, "timestamp": "2025-09-10 02:32:07.890767", "step": 6054, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:07.919548", "step": 6054, "epoch": 3 }, { "type": "loss", "content": 0.0017024686094373465, "timestamp": "2025-09-10 02:32:07.921541", "step": 6055, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:07.950547", "step": 6055, "epoch": 3 }, { "type": "loss", "content": 0.05321263521909714, "timestamp": "2025-09-10 02:32:07.973936", "step": 6056, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:08.002769", "step": 6056, "epoch": 3 }, { "type": "loss", "content": 0.013266685418784618, "timestamp": "2025-09-10 02:32:08.004663", "step": 6057, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:08.033701", "step": 6057, "epoch": 3 }, { "type": "loss", "content": 0.0016620157985016704, "timestamp": "2025-09-10 02:32:08.035567", "step": 6058, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:08.064407", "step": 6058, "epoch": 3 }, { "type": "loss", "content": 0.00020325240620877594, "timestamp": "2025-09-10 02:32:08.066196", "step": 6059, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:08.095052", "step": 6059, "epoch": 3 }, { "type": "loss", "content": 0.0015133811393752694, "timestamp": "2025-09-10 02:32:08.118275", "step": 6060, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:08.147581", "step": 6060, "epoch": 3 }, { "type": "loss", "content": 0.00016656095976941288, "timestamp": "2025-09-10 02:32:08.149624", "step": 6061, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:08.179411", "step": 6061, "epoch": 3 }, { "type": "loss", "content": 0.0001438201143173501, "timestamp": "2025-09-10 02:32:08.181391", "step": 6062, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:08.210419", "step": 6062, "epoch": 3 }, { "type": "loss", "content": 0.0006453626556321979, "timestamp": "2025-09-10 02:32:08.212326", "step": 6063, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:08.241145", "step": 6063, "epoch": 3 }, { "type": "loss", "content": 0.00026031830930151045, "timestamp": "2025-09-10 02:32:08.264551", "step": 6064, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:08.293793", "step": 6064, "epoch": 3 }, { "type": "loss", "content": 0.004452974535524845, "timestamp": "2025-09-10 02:32:08.295904", "step": 6065, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:08.325015", "step": 6065, "epoch": 3 }, { "type": "loss", "content": 0.0009710406302474439, "timestamp": "2025-09-10 02:32:08.326915", "step": 6066, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:08.356192", "step": 6066, "epoch": 3 }, { "type": "loss", "content": 0.00012414071534294635, "timestamp": "2025-09-10 02:32:08.357959", "step": 6067, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:08.386624", "step": 6067, "epoch": 3 }, { "type": "loss", "content": 0.02108047343790531, "timestamp": "2025-09-10 02:32:08.410156", "step": 6068, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:08.439257", "step": 6068, "epoch": 3 }, { "type": "loss", "content": 0.0010208667954429984, "timestamp": "2025-09-10 02:32:08.441119", "step": 6069, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:08.469891", "step": 6069, "epoch": 3 }, { "type": "loss", "content": 0.0005744010559283197, "timestamp": "2025-09-10 02:32:08.471673", "step": 6070, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:08.500236", "step": 6070, "epoch": 3 }, { "type": "loss", "content": 0.0001902828662423417, "timestamp": "2025-09-10 02:32:08.502083", "step": 6071, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:08.531175", "step": 6071, "epoch": 3 }, { "type": "loss", "content": 0.0005153705715201795, "timestamp": "2025-09-10 02:32:08.554663", "step": 6072, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:08.586075", "step": 6072, "epoch": 3 }, { "type": "loss", "content": 0.00047756050480529666, "timestamp": "2025-09-10 02:32:08.587878", "step": 6073, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:08.617095", "step": 6073, "epoch": 3 }, { "type": "loss", "content": 0.0004439638287294656, "timestamp": "2025-09-10 02:32:08.619091", "step": 6074, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:08.647926", "step": 6074, "epoch": 3 }, { "type": "loss", "content": 0.009202768094837666, "timestamp": "2025-09-10 02:32:08.649672", "step": 6075, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:08.679044", "step": 6075, "epoch": 3 }, { "type": "loss", "content": 0.001354446285404265, "timestamp": "2025-09-10 02:32:08.702500", "step": 6076, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:08.731702", "step": 6076, "epoch": 3 }, { "type": "loss", "content": 0.004026635084301233, "timestamp": "2025-09-10 02:32:08.733674", "step": 6077, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:08.762480", "step": 6077, "epoch": 3 }, { "type": "loss", "content": 0.0003100039029959589, "timestamp": "2025-09-10 02:32:08.764300", "step": 6078, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:08.793548", "step": 6078, "epoch": 3 }, { "type": "loss", "content": 0.00024007308820728213, "timestamp": "2025-09-10 02:32:08.795628", "step": 6079, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:08.824959", "step": 6079, "epoch": 3 }, { "type": "loss", "content": 0.0005758529296144843, "timestamp": "2025-09-10 02:32:08.848332", "step": 6080, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 96 ], "batch_size": 8, "flops": 1898404492032 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 5, 80 ], "batch_size": 8, "flops": 1582003754624 } ], "timestamp": "2025-09-10 02:32:10.722930", "step": 6080, "epoch": 3 }, { "type": "pplx", "content": 2645955.2737260633, "timestamp": "2025-09-10 02:32:10.724976", "step": 6080, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:10.752615", "step": 6080, "epoch": 3 }, { "type": "loss", "content": 0.0002218651061411947, "timestamp": "2025-09-10 02:32:10.754453", "step": 6081, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:10.785046", "step": 6081, "epoch": 3 }, { "type": "loss", "content": 0.0007585693383589387, "timestamp": "2025-09-10 02:32:10.786867", "step": 6082, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:10.815534", "step": 6082, "epoch": 3 }, { "type": "loss", "content": 0.0015208182157948613, "timestamp": "2025-09-10 02:32:10.817443", "step": 6083, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:10.846799", "step": 6083, "epoch": 3 }, { "type": "loss", "content": 0.0015198250766843557, "timestamp": "2025-09-10 02:32:10.870122", "step": 6084, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:10.899263", "step": 6084, "epoch": 3 }, { "type": "loss", "content": 0.0004692175716627389, "timestamp": "2025-09-10 02:32:10.901106", "step": 6085, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:10.929635", "step": 6085, "epoch": 3 }, { "type": "loss", "content": 0.0025713664945214987, "timestamp": "2025-09-10 02:32:10.931133", "step": 6086, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:10.960013", "step": 6086, "epoch": 3 }, { "type": "loss", "content": 0.001673850929364562, "timestamp": "2025-09-10 02:32:10.961753", "step": 6087, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:10.990391", "step": 6087, "epoch": 3 }, { "type": "loss", "content": 0.0027655635494738817, "timestamp": "2025-09-10 02:32:11.013827", "step": 6088, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:11.043082", "step": 6088, "epoch": 3 }, { "type": "loss", "content": 0.00013074224989395589, "timestamp": "2025-09-10 02:32:11.045104", "step": 6089, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:11.074067", "step": 6089, "epoch": 3 }, { "type": "loss", "content": 0.026914628222584724, "timestamp": "2025-09-10 02:32:11.075879", "step": 6090, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:11.104850", "step": 6090, "epoch": 3 }, { "type": "loss", "content": 0.0005443138652481139, "timestamp": "2025-09-10 02:32:11.106356", "step": 6091, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:11.135142", "step": 6091, "epoch": 3 }, { "type": "loss", "content": 0.00029482325771823525, "timestamp": "2025-09-10 02:32:11.158546", "step": 6092, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:11.188627", "step": 6092, "epoch": 3 }, { "type": "loss", "content": 0.0010598287917673588, "timestamp": "2025-09-10 02:32:11.190415", "step": 6093, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:11.221662", "step": 6093, "epoch": 3 }, { "type": "loss", "content": 0.015348801389336586, "timestamp": "2025-09-10 02:32:11.223412", "step": 6094, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:11.256660", "step": 6094, "epoch": 3 }, { "type": "loss", "content": 0.00018287813873030245, "timestamp": "2025-09-10 02:32:11.258924", "step": 6095, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:11.287545", "step": 6095, "epoch": 3 }, { "type": "loss", "content": 0.0003591575659811497, "timestamp": "2025-09-10 02:32:11.310904", "step": 6096, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:11.342593", "step": 6096, "epoch": 3 }, { "type": "loss", "content": 0.0528402216732502, "timestamp": "2025-09-10 02:32:11.344587", "step": 6097, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:11.375959", "step": 6097, "epoch": 3 }, { "type": "loss", "content": 0.006955190096050501, "timestamp": "2025-09-10 02:32:11.377858", "step": 6098, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:11.411720", "step": 6098, "epoch": 3 }, { "type": "loss", "content": 0.0035176179371774197, "timestamp": "2025-09-10 02:32:11.413503", "step": 6099, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:11.445858", "step": 6099, "epoch": 3 }, { "type": "loss", "content": 0.015753187239170074, "timestamp": "2025-09-10 02:32:11.469280", "step": 6100, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:11.505086", "step": 6100, "epoch": 3 }, { "type": "loss", "content": 0.0005709449178539217, "timestamp": "2025-09-10 02:32:11.507174", "step": 6101, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:11.538614", "step": 6101, "epoch": 3 }, { "type": "loss", "content": 0.0005121551221236587, "timestamp": "2025-09-10 02:32:11.540708", "step": 6102, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:11.575806", "step": 6102, "epoch": 3 }, { "type": "loss", "content": 0.003526447806507349, "timestamp": "2025-09-10 02:32:11.577696", "step": 6103, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:11.609683", "step": 6103, "epoch": 3 }, { "type": "loss", "content": 0.00563672324642539, "timestamp": "2025-09-10 02:32:11.633388", "step": 6104, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:11.664181", "step": 6104, "epoch": 3 }, { "type": "loss", "content": 0.034490954130887985, "timestamp": "2025-09-10 02:32:11.665943", "step": 6105, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:11.703553", "step": 6105, "epoch": 3 }, { "type": "loss", "content": 0.0005361035582609475, "timestamp": "2025-09-10 02:32:11.705535", "step": 6106, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:11.743083", "step": 6106, "epoch": 3 }, { "type": "loss", "content": 0.00022824991901870817, "timestamp": "2025-09-10 02:32:11.744860", "step": 6107, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:11.778747", "step": 6107, "epoch": 3 }, { "type": "loss", "content": 0.00033912801882252097, "timestamp": "2025-09-10 02:32:11.802065", "step": 6108, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:11.831090", "step": 6108, "epoch": 3 }, { "type": "loss", "content": 0.0004516389162745327, "timestamp": "2025-09-10 02:32:11.832796", "step": 6109, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:11.861531", "step": 6109, "epoch": 3 }, { "type": "loss", "content": 0.0016965868417173624, "timestamp": "2025-09-10 02:32:11.863190", "step": 6110, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:11.892382", "step": 6110, "epoch": 3 }, { "type": "loss", "content": 0.0002465526922605932, "timestamp": "2025-09-10 02:32:11.893978", "step": 6111, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:11.923297", "step": 6111, "epoch": 3 }, { "type": "loss", "content": 0.0005807014531455934, "timestamp": "2025-09-10 02:32:11.946521", "step": 6112, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:11.975691", "step": 6112, "epoch": 3 }, { "type": "loss", "content": 0.000393257214454934, "timestamp": "2025-09-10 02:32:11.977596", "step": 6113, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:12.006656", "step": 6113, "epoch": 3 }, { "type": "loss", "content": 0.039987385272979736, "timestamp": "2025-09-10 02:32:12.008545", "step": 6114, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:12.037688", "step": 6114, "epoch": 3 }, { "type": "loss", "content": 0.0008695316500961781, "timestamp": "2025-09-10 02:32:12.039677", "step": 6115, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:12.068398", "step": 6115, "epoch": 3 }, { "type": "loss", "content": 0.010479303076863289, "timestamp": "2025-09-10 02:32:12.091893", "step": 6116, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:12.121026", "step": 6116, "epoch": 3 }, { "type": "loss", "content": 0.0006460921722464263, "timestamp": "2025-09-10 02:32:12.123141", "step": 6117, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:12.152401", "step": 6117, "epoch": 3 }, { "type": "loss", "content": 0.01761404611170292, "timestamp": "2025-09-10 02:32:12.154498", "step": 6118, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:32:12.183639", "step": 6118, "epoch": 3 }, { "type": "loss", "content": 0.0028172603342682123, "timestamp": "2025-09-10 02:32:12.185574", "step": 6119, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:12.214910", "step": 6119, "epoch": 3 }, { "type": "loss", "content": 0.0009161824127659202, "timestamp": "2025-09-10 02:32:12.238248", "step": 6120, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:12.267956", "step": 6120, "epoch": 3 }, { "type": "loss", "content": 0.000278738618362695, "timestamp": "2025-09-10 02:32:12.270159", "step": 6121, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:12.299252", "step": 6121, "epoch": 3 }, { "type": "loss", "content": 0.0011909457389265299, "timestamp": "2025-09-10 02:32:12.301256", "step": 6122, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:12.330692", "step": 6122, "epoch": 3 }, { "type": "loss", "content": 0.0005585025646723807, "timestamp": "2025-09-10 02:32:12.332580", "step": 6123, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:12.361637", "step": 6123, "epoch": 3 }, { "type": "loss", "content": 0.0004416074953041971, "timestamp": "2025-09-10 02:32:12.384629", "step": 6124, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:12.413611", "step": 6124, "epoch": 3 }, { "type": "loss", "content": 0.03923536464571953, "timestamp": "2025-09-10 02:32:12.415210", "step": 6125, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:12.444017", "step": 6125, "epoch": 3 }, { "type": "loss", "content": 0.0012181001948192716, "timestamp": "2025-09-10 02:32:12.445942", "step": 6126, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:12.475024", "step": 6126, "epoch": 3 }, { "type": "loss", "content": 0.0029334353748708963, "timestamp": "2025-09-10 02:32:12.476755", "step": 6127, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:12.505343", "step": 6127, "epoch": 3 }, { "type": "loss", "content": 0.009986157529056072, "timestamp": "2025-09-10 02:32:12.528831", "step": 6128, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:12.557868", "step": 6128, "epoch": 3 }, { "type": "loss", "content": 0.021301427856087685, "timestamp": "2025-09-10 02:32:12.559807", "step": 6129, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:12.588302", "step": 6129, "epoch": 3 }, { "type": "loss", "content": 0.0011228956282138824, "timestamp": "2025-09-10 02:32:12.590059", "step": 6130, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:12.618629", "step": 6130, "epoch": 3 }, { "type": "loss", "content": 0.000370693946024403, "timestamp": "2025-09-10 02:32:12.620467", "step": 6131, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:12.649412", "step": 6131, "epoch": 3 }, { "type": "loss", "content": 0.0028962441720068455, "timestamp": "2025-09-10 02:32:12.672855", "step": 6132, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:12.701800", "step": 6132, "epoch": 3 }, { "type": "loss", "content": 0.00017247784126084298, "timestamp": "2025-09-10 02:32:12.703995", "step": 6133, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:12.732710", "step": 6133, "epoch": 3 }, { "type": "loss", "content": 0.0002645184868015349, "timestamp": "2025-09-10 02:32:12.735901", "step": 6134, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:12.767336", "step": 6134, "epoch": 3 }, { "type": "loss", "content": 0.0008119445410557091, "timestamp": "2025-09-10 02:32:12.769082", "step": 6135, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:12.802533", "step": 6135, "epoch": 3 }, { "type": "loss", "content": 0.0010705316672101617, "timestamp": "2025-09-10 02:32:12.825864", "step": 6136, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:12.854767", "step": 6136, "epoch": 3 }, { "type": "loss", "content": 0.007430485915392637, "timestamp": "2025-09-10 02:32:12.856426", "step": 6137, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:12.885400", "step": 6137, "epoch": 3 }, { "type": "loss", "content": 0.017648329958319664, "timestamp": "2025-09-10 02:32:12.887352", "step": 6138, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:12.916699", "step": 6138, "epoch": 3 }, { "type": "loss", "content": 0.0019228752935305238, "timestamp": "2025-09-10 02:32:12.918559", "step": 6139, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:12.947597", "step": 6139, "epoch": 3 }, { "type": "loss", "content": 0.0014111108612269163, "timestamp": "2025-09-10 02:32:12.971073", "step": 6140, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:12.999730", "step": 6140, "epoch": 3 }, { "type": "loss", "content": 0.0016826939536258578, "timestamp": "2025-09-10 02:32:13.002179", "step": 6141, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:13.032704", "step": 6141, "epoch": 3 }, { "type": "loss", "content": 0.0007334706024266779, "timestamp": "2025-09-10 02:32:13.034532", "step": 6142, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:13.063618", "step": 6142, "epoch": 3 }, { "type": "loss", "content": 0.00017897525685839355, "timestamp": "2025-09-10 02:32:13.065570", "step": 6143, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:13.094353", "step": 6143, "epoch": 3 }, { "type": "loss", "content": 0.0010283568408340216, "timestamp": "2025-09-10 02:32:13.117707", "step": 6144, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:13.147305", "step": 6144, "epoch": 3 }, { "type": "loss", "content": 0.0006188590778037906, "timestamp": "2025-09-10 02:32:13.149186", "step": 6145, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:13.180082", "step": 6145, "epoch": 3 }, { "type": "loss", "content": 0.0004881804343312979, "timestamp": "2025-09-10 02:32:13.181820", "step": 6146, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:13.214603", "step": 6146, "epoch": 3 }, { "type": "loss", "content": 0.000541047949809581, "timestamp": "2025-09-10 02:32:13.216744", "step": 6147, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:13.251832", "step": 6147, "epoch": 3 }, { "type": "loss", "content": 0.005712251644581556, "timestamp": "2025-09-10 02:32:13.275138", "step": 6148, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:13.304284", "step": 6148, "epoch": 3 }, { "type": "loss", "content": 0.001517638680525124, "timestamp": "2025-09-10 02:32:13.306203", "step": 6149, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:13.336477", "step": 6149, "epoch": 3 }, { "type": "loss", "content": 0.0017551190685480833, "timestamp": "2025-09-10 02:32:13.338432", "step": 6150, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:13.368942", "step": 6150, "epoch": 3 }, { "type": "loss", "content": 0.0005528161418624222, "timestamp": "2025-09-10 02:32:13.370911", "step": 6151, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:13.404658", "step": 6151, "epoch": 3 }, { "type": "loss", "content": 0.000431155989645049, "timestamp": "2025-09-10 02:32:13.428213", "step": 6152, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:32:13.457744", "step": 6152, "epoch": 3 }, { "type": "loss", "content": 0.010887889191508293, "timestamp": "2025-09-10 02:32:13.459780", "step": 6153, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:13.496303", "step": 6153, "epoch": 3 }, { "type": "loss", "content": 0.004278097301721573, "timestamp": "2025-09-10 02:32:13.498321", "step": 6154, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:13.531496", "step": 6154, "epoch": 3 }, { "type": "loss", "content": 0.010864503681659698, "timestamp": "2025-09-10 02:32:13.533306", "step": 6155, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:13.566594", "step": 6155, "epoch": 3 }, { "type": "loss", "content": 0.004223777912557125, "timestamp": "2025-09-10 02:32:13.590021", "step": 6156, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:13.621893", "step": 6156, "epoch": 3 }, { "type": "loss", "content": 0.00029171284404583275, "timestamp": "2025-09-10 02:32:13.623788", "step": 6157, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:13.657131", "step": 6157, "epoch": 3 }, { "type": "loss", "content": 0.0011619922006502748, "timestamp": "2025-09-10 02:32:13.659086", "step": 6158, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:13.698307", "step": 6158, "epoch": 3 }, { "type": "loss", "content": 0.0005122654838487506, "timestamp": "2025-09-10 02:32:13.700086", "step": 6159, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:13.738298", "step": 6159, "epoch": 3 }, { "type": "loss", "content": 0.00033908014302141964, "timestamp": "2025-09-10 02:32:13.762018", "step": 6160, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:13.798997", "step": 6160, "epoch": 3 }, { "type": "loss", "content": 0.02243952453136444, "timestamp": "2025-09-10 02:32:13.801122", "step": 6161, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:13.830500", "step": 6161, "epoch": 3 }, { "type": "loss", "content": 0.007921956479549408, "timestamp": "2025-09-10 02:32:13.833682", "step": 6162, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:13.865096", "step": 6162, "epoch": 3 }, { "type": "loss", "content": 0.0003726438444573432, "timestamp": "2025-09-10 02:32:13.867165", "step": 6163, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:13.895969", "step": 6163, "epoch": 3 }, { "type": "loss", "content": 0.00021600746549665928, "timestamp": "2025-09-10 02:32:13.919516", "step": 6164, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:13.949089", "step": 6164, "epoch": 3 }, { "type": "loss", "content": 0.00013112745364196599, "timestamp": "2025-09-10 02:32:13.951125", "step": 6165, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:13.980350", "step": 6165, "epoch": 3 }, { "type": "loss", "content": 0.00412676902487874, "timestamp": "2025-09-10 02:32:13.982241", "step": 6166, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:14.011304", "step": 6166, "epoch": 3 }, { "type": "loss", "content": 0.004285149276256561, "timestamp": "2025-09-10 02:32:14.013358", "step": 6167, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:14.042800", "step": 6167, "epoch": 3 }, { "type": "loss", "content": 0.0004469568666536361, "timestamp": "2025-09-10 02:32:14.066299", "step": 6168, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:14.096145", "step": 6168, "epoch": 3 }, { "type": "loss", "content": 0.0006515326676890254, "timestamp": "2025-09-10 02:32:14.098098", "step": 6169, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:14.127159", "step": 6169, "epoch": 3 }, { "type": "loss", "content": 0.0013496106257662177, "timestamp": "2025-09-10 02:32:14.128814", "step": 6170, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:14.158335", "step": 6170, "epoch": 3 }, { "type": "loss", "content": 0.0009292674367316067, "timestamp": "2025-09-10 02:32:14.160227", "step": 6171, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:14.189352", "step": 6171, "epoch": 3 }, { "type": "loss", "content": 0.0011377623304724693, "timestamp": "2025-09-10 02:32:14.212897", "step": 6172, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:14.242531", "step": 6172, "epoch": 3 }, { "type": "loss", "content": 0.01203919854015112, "timestamp": "2025-09-10 02:32:14.244487", "step": 6173, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:14.273885", "step": 6173, "epoch": 3 }, { "type": "loss", "content": 0.00012295367196202278, "timestamp": "2025-09-10 02:32:14.275587", "step": 6174, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:14.304489", "step": 6174, "epoch": 3 }, { "type": "loss", "content": 0.00014454529446084052, "timestamp": "2025-09-10 02:32:14.306216", "step": 6175, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:14.335291", "step": 6175, "epoch": 3 }, { "type": "loss", "content": 0.00014045732677914202, "timestamp": "2025-09-10 02:32:14.358860", "step": 6176, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:14.388401", "step": 6176, "epoch": 3 }, { "type": "loss", "content": 0.00039911657222546637, "timestamp": "2025-09-10 02:32:14.390230", "step": 6177, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:14.420161", "step": 6177, "epoch": 3 }, { "type": "loss", "content": 0.0013413883280009031, "timestamp": "2025-09-10 02:32:14.422011", "step": 6178, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:14.451483", "step": 6178, "epoch": 3 }, { "type": "loss", "content": 0.00018301047384738922, "timestamp": "2025-09-10 02:32:14.453476", "step": 6179, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:14.482436", "step": 6179, "epoch": 3 }, { "type": "loss", "content": 0.0018863353179767728, "timestamp": "2025-09-10 02:32:14.505809", "step": 6180, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:14.535443", "step": 6180, "epoch": 3 }, { "type": "loss", "content": 0.00019373593386262655, "timestamp": "2025-09-10 02:32:14.537403", "step": 6181, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:14.566887", "step": 6181, "epoch": 3 }, { "type": "loss", "content": 0.03961193934082985, "timestamp": "2025-09-10 02:32:14.568823", "step": 6182, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:14.598044", "step": 6182, "epoch": 3 }, { "type": "loss", "content": 0.005647978745400906, "timestamp": "2025-09-10 02:32:14.599915", "step": 6183, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:14.628906", "step": 6183, "epoch": 3 }, { "type": "loss", "content": 0.00017677816504146904, "timestamp": "2025-09-10 02:32:14.652400", "step": 6184, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:14.681444", "step": 6184, "epoch": 3 }, { "type": "loss", "content": 0.0004426330851856619, "timestamp": "2025-09-10 02:32:14.683263", "step": 6185, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:14.712685", "step": 6185, "epoch": 3 }, { "type": "loss", "content": 0.00012685095134656876, "timestamp": "2025-09-10 02:32:14.714665", "step": 6186, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:14.744367", "step": 6186, "epoch": 3 }, { "type": "loss", "content": 0.00031306553864851594, "timestamp": "2025-09-10 02:32:14.746523", "step": 6187, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:14.778144", "step": 6187, "epoch": 3 }, { "type": "loss", "content": 0.0031656906940042973, "timestamp": "2025-09-10 02:32:14.801685", "step": 6188, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:14.831314", "step": 6188, "epoch": 3 }, { "type": "loss", "content": 0.00013748157653026283, "timestamp": "2025-09-10 02:32:14.833424", "step": 6189, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:14.863116", "step": 6189, "epoch": 3 }, { "type": "loss", "content": 8.928526222007349e-05, "timestamp": "2025-09-10 02:32:14.865332", "step": 6190, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:14.895082", "step": 6190, "epoch": 3 }, { "type": "loss", "content": 9.361335105495527e-05, "timestamp": "2025-09-10 02:32:14.897188", "step": 6191, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:14.926467", "step": 6191, "epoch": 3 }, { "type": "loss", "content": 0.00012292077008169144, "timestamp": "2025-09-10 02:32:14.949934", "step": 6192, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:14.979431", "step": 6192, "epoch": 3 }, { "type": "loss", "content": 0.01482431497424841, "timestamp": "2025-09-10 02:32:14.981328", "step": 6193, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:15.010231", "step": 6193, "epoch": 3 }, { "type": "loss", "content": 0.006716754753142595, "timestamp": "2025-09-10 02:32:15.012208", "step": 6194, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:15.041545", "step": 6194, "epoch": 3 }, { "type": "loss", "content": 0.03301231190562248, "timestamp": "2025-09-10 02:32:15.043380", "step": 6195, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:15.072426", "step": 6195, "epoch": 3 }, { "type": "loss", "content": 0.008143181912600994, "timestamp": "2025-09-10 02:32:15.095873", "step": 6196, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:15.125315", "step": 6196, "epoch": 3 }, { "type": "loss", "content": 0.00032031707814894617, "timestamp": "2025-09-10 02:32:15.127316", "step": 6197, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:15.157348", "step": 6197, "epoch": 3 }, { "type": "loss", "content": 0.0015913124661892653, "timestamp": "2025-09-10 02:32:15.159360", "step": 6198, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:15.190009", "step": 6198, "epoch": 3 }, { "type": "loss", "content": 9.639223571866751e-05, "timestamp": "2025-09-10 02:32:15.192145", "step": 6199, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:15.224192", "step": 6199, "epoch": 3 }, { "type": "loss", "content": 7.586956053273752e-05, "timestamp": "2025-09-10 02:32:15.247859", "step": 6200, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:15.278900", "step": 6200, "epoch": 3 }, { "type": "loss", "content": 0.00031549923005513847, "timestamp": "2025-09-10 02:32:15.280748", "step": 6201, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:15.310097", "step": 6201, "epoch": 3 }, { "type": "loss", "content": 0.0003611970169004053, "timestamp": "2025-09-10 02:32:15.311838", "step": 6202, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:15.343670", "step": 6202, "epoch": 3 }, { "type": "loss", "content": 0.012886380776762962, "timestamp": "2025-09-10 02:32:15.345565", "step": 6203, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:15.376797", "step": 6203, "epoch": 3 }, { "type": "loss", "content": 7.04435515217483e-05, "timestamp": "2025-09-10 02:32:15.400396", "step": 6204, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:15.436695", "step": 6204, "epoch": 3 }, { "type": "loss", "content": 0.00022571615409106016, "timestamp": "2025-09-10 02:32:15.438755", "step": 6205, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:15.472458", "step": 6205, "epoch": 3 }, { "type": "loss", "content": 7.774594268994406e-05, "timestamp": "2025-09-10 02:32:15.474265", "step": 6206, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:15.509034", "step": 6206, "epoch": 3 }, { "type": "loss", "content": 5.046996375313029e-05, "timestamp": "2025-09-10 02:32:15.510900", "step": 6207, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:15.540835", "step": 6207, "epoch": 3 }, { "type": "loss", "content": 0.00037210204754956067, "timestamp": "2025-09-10 02:32:15.564471", "step": 6208, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:15.600615", "step": 6208, "epoch": 3 }, { "type": "loss", "content": 0.0005679802852682769, "timestamp": "2025-09-10 02:32:15.602653", "step": 6209, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:15.636276", "step": 6209, "epoch": 3 }, { "type": "loss", "content": 0.0002304376830579713, "timestamp": "2025-09-10 02:32:15.638117", "step": 6210, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:15.671371", "step": 6210, "epoch": 3 }, { "type": "loss", "content": 0.004458740819245577, "timestamp": "2025-09-10 02:32:15.673673", "step": 6211, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:15.711415", "step": 6211, "epoch": 3 }, { "type": "loss", "content": 8.715118747204542e-05, "timestamp": "2025-09-10 02:32:15.734981", "step": 6212, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:15.771670", "step": 6212, "epoch": 3 }, { "type": "loss", "content": 0.00011552788782864809, "timestamp": "2025-09-10 02:32:15.773987", "step": 6213, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:15.812433", "step": 6213, "epoch": 3 }, { "type": "loss", "content": 0.0019429840613156557, "timestamp": "2025-09-10 02:32:15.814448", "step": 6214, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:15.844191", "step": 6214, "epoch": 3 }, { "type": "loss", "content": 0.002169437939301133, "timestamp": "2025-09-10 02:32:15.846212", "step": 6215, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:15.875748", "step": 6215, "epoch": 3 }, { "type": "loss", "content": 0.017387619242072105, "timestamp": "2025-09-10 02:32:15.900170", "step": 6216, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:15.929568", "step": 6216, "epoch": 3 }, { "type": "loss", "content": 0.0425243116915226, "timestamp": "2025-09-10 02:32:15.931524", "step": 6217, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:15.960536", "step": 6217, "epoch": 3 }, { "type": "loss", "content": 0.04513928294181824, "timestamp": "2025-09-10 02:32:15.962697", "step": 6218, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:15.992056", "step": 6218, "epoch": 3 }, { "type": "loss", "content": 0.0001454920566175133, "timestamp": "2025-09-10 02:32:15.994231", "step": 6219, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:16.023619", "step": 6219, "epoch": 3 }, { "type": "loss", "content": 0.00026676151901483536, "timestamp": "2025-09-10 02:32:16.047266", "step": 6220, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:16.077588", "step": 6220, "epoch": 3 }, { "type": "loss", "content": 0.01327372808009386, "timestamp": "2025-09-10 02:32:16.079322", "step": 6221, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:16.108561", "step": 6221, "epoch": 3 }, { "type": "loss", "content": 7.856685988372192e-05, "timestamp": "2025-09-10 02:32:16.110478", "step": 6222, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:16.140122", "step": 6222, "epoch": 3 }, { "type": "loss", "content": 0.0002478799724485725, "timestamp": "2025-09-10 02:32:16.141926", "step": 6223, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:16.170628", "step": 6223, "epoch": 3 }, { "type": "loss", "content": 0.0002998611016664654, "timestamp": "2025-09-10 02:32:16.193940", "step": 6224, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:16.223877", "step": 6224, "epoch": 3 }, { "type": "loss", "content": 0.00018482895393390208, "timestamp": "2025-09-10 02:32:16.225816", "step": 6225, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:16.256063", "step": 6225, "epoch": 3 }, { "type": "loss", "content": 9.805053559830412e-05, "timestamp": "2025-09-10 02:32:16.257926", "step": 6226, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:16.288360", "step": 6226, "epoch": 3 }, { "type": "loss", "content": 0.00033532059751451015, "timestamp": "2025-09-10 02:32:16.290101", "step": 6227, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:16.322326", "step": 6227, "epoch": 3 }, { "type": "loss", "content": 0.00011088912287959829, "timestamp": "2025-09-10 02:32:16.345707", "step": 6228, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:16.378762", "step": 6228, "epoch": 3 }, { "type": "loss", "content": 0.00028449486126191914, "timestamp": "2025-09-10 02:32:16.380436", "step": 6229, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:16.409194", "step": 6229, "epoch": 3 }, { "type": "loss", "content": 0.001823748811148107, "timestamp": "2025-09-10 02:32:16.410931", "step": 6230, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:16.439871", "step": 6230, "epoch": 3 }, { "type": "loss", "content": 0.00012009156489511952, "timestamp": "2025-09-10 02:32:16.441630", "step": 6231, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:16.472398", "step": 6231, "epoch": 3 }, { "type": "loss", "content": 0.00025651781470514834, "timestamp": "2025-09-10 02:32:16.495813", "step": 6232, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 96 ], "batch_size": 8, "flops": 1898404492032 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 5, 80 ], "batch_size": 8, "flops": 1582003754624 } ], "timestamp": "2025-09-10 02:32:18.431282", "step": 6232, "epoch": 3 }, { "type": "pplx", "content": 2606747.3698908505, "timestamp": "2025-09-10 02:32:18.434868", "step": 6232, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:18.463633", "step": 6232, "epoch": 3 }, { "type": "loss", "content": 0.02282080054283142, "timestamp": "2025-09-10 02:32:18.465407", "step": 6233, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:32:18.494958", "step": 6233, "epoch": 3 }, { "type": "loss", "content": 0.00016311134095303714, "timestamp": "2025-09-10 02:32:18.496931", "step": 6234, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:18.526297", "step": 6234, "epoch": 3 }, { "type": "loss", "content": 0.000255206337897107, "timestamp": "2025-09-10 02:32:18.528192", "step": 6235, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:18.557708", "step": 6235, "epoch": 3 }, { "type": "loss", "content": 8.574249659432098e-05, "timestamp": "2025-09-10 02:32:18.581180", "step": 6236, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:18.610717", "step": 6236, "epoch": 3 }, { "type": "loss", "content": 0.00012947633513249457, "timestamp": "2025-09-10 02:32:18.612895", "step": 6237, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:18.642857", "step": 6237, "epoch": 3 }, { "type": "loss", "content": 0.04599553719162941, "timestamp": "2025-09-10 02:32:18.644925", "step": 6238, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:18.674260", "step": 6238, "epoch": 3 }, { "type": "loss", "content": 6.865202158223838e-05, "timestamp": "2025-09-10 02:32:18.676093", "step": 6239, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:18.705420", "step": 6239, "epoch": 3 }, { "type": "loss", "content": 9.582000348018482e-05, "timestamp": "2025-09-10 02:32:18.728882", "step": 6240, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:18.758203", "step": 6240, "epoch": 3 }, { "type": "loss", "content": 0.0253781545907259, "timestamp": "2025-09-10 02:32:18.760079", "step": 6241, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:18.792731", "step": 6241, "epoch": 3 }, { "type": "loss", "content": 0.0001324364129686728, "timestamp": "2025-09-10 02:32:18.794693", "step": 6242, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:18.823912", "step": 6242, "epoch": 3 }, { "type": "loss", "content": 0.00013522346853278577, "timestamp": "2025-09-10 02:32:18.825828", "step": 6243, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:18.855729", "step": 6243, "epoch": 3 }, { "type": "loss", "content": 0.00027700577629730105, "timestamp": "2025-09-10 02:32:18.879022", "step": 6244, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:18.908380", "step": 6244, "epoch": 3 }, { "type": "loss", "content": 0.0021711511071771383, "timestamp": "2025-09-10 02:32:18.910263", "step": 6245, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:18.939005", "step": 6245, "epoch": 3 }, { "type": "loss", "content": 0.0015461508883163333, "timestamp": "2025-09-10 02:32:18.940797", "step": 6246, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:18.970112", "step": 6246, "epoch": 3 }, { "type": "loss", "content": 0.00022470230760518461, "timestamp": "2025-09-10 02:32:18.972104", "step": 6247, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:19.001210", "step": 6247, "epoch": 3 }, { "type": "loss", "content": 0.000954842078499496, "timestamp": "2025-09-10 02:32:19.024809", "step": 6248, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:19.054968", "step": 6248, "epoch": 3 }, { "type": "loss", "content": 0.00021177942107897252, "timestamp": "2025-09-10 02:32:19.056788", "step": 6249, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:19.085634", "step": 6249, "epoch": 3 }, { "type": "loss", "content": 0.008807296864688396, "timestamp": "2025-09-10 02:32:19.087451", "step": 6250, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:19.116854", "step": 6250, "epoch": 3 }, { "type": "loss", "content": 0.00024041572760324925, "timestamp": "2025-09-10 02:32:19.118606", "step": 6251, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:19.147863", "step": 6251, "epoch": 3 }, { "type": "loss", "content": 0.03950396180152893, "timestamp": "2025-09-10 02:32:19.171237", "step": 6252, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:19.201996", "step": 6252, "epoch": 3 }, { "type": "loss", "content": 0.0005115449312143028, "timestamp": "2025-09-10 02:32:19.203785", "step": 6253, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:19.236659", "step": 6253, "epoch": 3 }, { "type": "loss", "content": 0.005030186381191015, "timestamp": "2025-09-10 02:32:19.238428", "step": 6254, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:19.270585", "step": 6254, "epoch": 3 }, { "type": "loss", "content": 0.017237406224012375, "timestamp": "2025-09-10 02:32:19.272698", "step": 6255, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:19.302247", "step": 6255, "epoch": 3 }, { "type": "loss", "content": 0.0001371714606648311, "timestamp": "2025-09-10 02:32:19.326265", "step": 6256, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:19.357618", "step": 6256, "epoch": 3 }, { "type": "loss", "content": 0.0002689917164389044, "timestamp": "2025-09-10 02:32:19.359973", "step": 6257, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:19.390962", "step": 6257, "epoch": 3 }, { "type": "loss", "content": 0.00031374432728625834, "timestamp": "2025-09-10 02:32:19.393036", "step": 6258, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:19.428450", "step": 6258, "epoch": 3 }, { "type": "loss", "content": 0.0002451702021062374, "timestamp": "2025-09-10 02:32:19.430259", "step": 6259, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:19.461143", "step": 6259, "epoch": 3 }, { "type": "loss", "content": 0.00019412672554608434, "timestamp": "2025-09-10 02:32:19.484750", "step": 6260, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:19.517857", "step": 6260, "epoch": 3 }, { "type": "loss", "content": 0.00016561677330173552, "timestamp": "2025-09-10 02:32:19.519927", "step": 6261, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:19.553055", "step": 6261, "epoch": 3 }, { "type": "loss", "content": 8.579413406550884e-05, "timestamp": "2025-09-10 02:32:19.555146", "step": 6262, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:19.591614", "step": 6262, "epoch": 3 }, { "type": "loss", "content": 0.00016297269030474126, "timestamp": "2025-09-10 02:32:19.593356", "step": 6263, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:19.625978", "step": 6263, "epoch": 3 }, { "type": "loss", "content": 0.0018586188089102507, "timestamp": "2025-09-10 02:32:19.649378", "step": 6264, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:19.687636", "step": 6264, "epoch": 3 }, { "type": "loss", "content": 0.00047484468086622655, "timestamp": "2025-09-10 02:32:19.689473", "step": 6265, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:19.727203", "step": 6265, "epoch": 3 }, { "type": "loss", "content": 0.00035110226599499583, "timestamp": "2025-09-10 02:32:19.729145", "step": 6266, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:19.764740", "step": 6266, "epoch": 3 }, { "type": "loss", "content": 0.00018414246733300388, "timestamp": "2025-09-10 02:32:19.766376", "step": 6267, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:19.805016", "step": 6267, "epoch": 3 }, { "type": "loss", "content": 0.0004543558170553297, "timestamp": "2025-09-10 02:32:19.828573", "step": 6268, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:19.858293", "step": 6268, "epoch": 3 }, { "type": "loss", "content": 0.00028682383708655834, "timestamp": "2025-09-10 02:32:19.860282", "step": 6269, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:19.889809", "step": 6269, "epoch": 3 }, { "type": "loss", "content": 0.0007902501965872943, "timestamp": "2025-09-10 02:32:19.892018", "step": 6270, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:19.922637", "step": 6270, "epoch": 3 }, { "type": "loss", "content": 0.001329060411080718, "timestamp": "2025-09-10 02:32:19.924473", "step": 6271, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:19.954460", "step": 6271, "epoch": 3 }, { "type": "loss", "content": 9.7282012575306e-05, "timestamp": "2025-09-10 02:32:19.977960", "step": 6272, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:20.006771", "step": 6272, "epoch": 3 }, { "type": "loss", "content": 0.00014796505274716765, "timestamp": "2025-09-10 02:32:20.008334", "step": 6273, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:20.037605", "step": 6273, "epoch": 3 }, { "type": "loss", "content": 0.0030559967271983624, "timestamp": "2025-09-10 02:32:20.039555", "step": 6274, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:20.068523", "step": 6274, "epoch": 3 }, { "type": "loss", "content": 0.002250237390398979, "timestamp": "2025-09-10 02:32:20.070454", "step": 6275, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:20.099497", "step": 6275, "epoch": 3 }, { "type": "loss", "content": 0.000167002814123407, "timestamp": "2025-09-10 02:32:20.122883", "step": 6276, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:20.151636", "step": 6276, "epoch": 3 }, { "type": "loss", "content": 9.803228022065014e-05, "timestamp": "2025-09-10 02:32:20.153468", "step": 6277, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:20.182403", "step": 6277, "epoch": 3 }, { "type": "loss", "content": 0.0011862096143886447, "timestamp": "2025-09-10 02:32:20.184469", "step": 6278, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:20.213282", "step": 6278, "epoch": 3 }, { "type": "loss", "content": 0.00016611372120678425, "timestamp": "2025-09-10 02:32:20.215310", "step": 6279, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:20.244471", "step": 6279, "epoch": 3 }, { "type": "loss", "content": 0.0001127138311858289, "timestamp": "2025-09-10 02:32:20.267716", "step": 6280, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:20.297063", "step": 6280, "epoch": 3 }, { "type": "loss", "content": 0.004821238573640585, "timestamp": "2025-09-10 02:32:20.298897", "step": 6281, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:20.328250", "step": 6281, "epoch": 3 }, { "type": "loss", "content": 0.002264035167172551, "timestamp": "2025-09-10 02:32:20.330132", "step": 6282, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:20.359405", "step": 6282, "epoch": 3 }, { "type": "loss", "content": 0.00019882863853126764, "timestamp": "2025-09-10 02:32:20.361453", "step": 6283, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:20.391238", "step": 6283, "epoch": 3 }, { "type": "loss", "content": 0.0003595865855459124, "timestamp": "2025-09-10 02:32:20.414781", "step": 6284, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:20.444124", "step": 6284, "epoch": 3 }, { "type": "loss", "content": 0.00025777207338251173, "timestamp": "2025-09-10 02:32:20.446232", "step": 6285, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:20.475692", "step": 6285, "epoch": 3 }, { "type": "loss", "content": 0.011265160515904427, "timestamp": "2025-09-10 02:32:20.477574", "step": 6286, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:20.506551", "step": 6286, "epoch": 3 }, { "type": "loss", "content": 0.0018868193728849292, "timestamp": "2025-09-10 02:32:20.508366", "step": 6287, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:20.537179", "step": 6287, "epoch": 3 }, { "type": "loss", "content": 0.018199991434812546, "timestamp": "2025-09-10 02:32:20.560369", "step": 6288, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:20.590258", "step": 6288, "epoch": 3 }, { "type": "loss", "content": 0.0010011766571551561, "timestamp": "2025-09-10 02:32:20.592057", "step": 6289, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:20.621450", "step": 6289, "epoch": 3 }, { "type": "loss", "content": 0.0005134205566719174, "timestamp": "2025-09-10 02:32:20.623595", "step": 6290, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:20.652552", "step": 6290, "epoch": 3 }, { "type": "loss", "content": 0.0004429513937793672, "timestamp": "2025-09-10 02:32:20.654507", "step": 6291, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:20.683722", "step": 6291, "epoch": 3 }, { "type": "loss", "content": 0.00045281826169230044, "timestamp": "2025-09-10 02:32:20.707280", "step": 6292, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:20.736434", "step": 6292, "epoch": 3 }, { "type": "loss", "content": 0.00084385258378461, "timestamp": "2025-09-10 02:32:20.738389", "step": 6293, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:20.771882", "step": 6293, "epoch": 3 }, { "type": "loss", "content": 0.0003129442047793418, "timestamp": "2025-09-10 02:32:20.773844", "step": 6294, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:20.808669", "step": 6294, "epoch": 3 }, { "type": "loss", "content": 0.0003997218154836446, "timestamp": "2025-09-10 02:32:20.810523", "step": 6295, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:20.839583", "step": 6295, "epoch": 3 }, { "type": "loss", "content": 9.220842184731737e-05, "timestamp": "2025-09-10 02:32:20.863075", "step": 6296, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:20.892053", "step": 6296, "epoch": 3 }, { "type": "loss", "content": 0.013149378821253777, "timestamp": "2025-09-10 02:32:20.893771", "step": 6297, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:20.922321", "step": 6297, "epoch": 3 }, { "type": "loss", "content": 0.017743749544024467, "timestamp": "2025-09-10 02:32:20.924058", "step": 6298, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:20.952506", "step": 6298, "epoch": 3 }, { "type": "loss", "content": 0.002593550132587552, "timestamp": "2025-09-10 02:32:20.954584", "step": 6299, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:20.983919", "step": 6299, "epoch": 3 }, { "type": "loss", "content": 0.0023273208644241095, "timestamp": "2025-09-10 02:32:21.007316", "step": 6300, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:21.036427", "step": 6300, "epoch": 3 }, { "type": "loss", "content": 0.00046904286136850715, "timestamp": "2025-09-10 02:32:21.038125", "step": 6301, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:21.067227", "step": 6301, "epoch": 3 }, { "type": "loss", "content": 0.004067537374794483, "timestamp": "2025-09-10 02:32:21.069008", "step": 6302, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:21.097943", "step": 6302, "epoch": 3 }, { "type": "loss", "content": 0.00010718592966441065, "timestamp": "2025-09-10 02:32:21.101084", "step": 6303, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:21.132627", "step": 6303, "epoch": 3 }, { "type": "loss", "content": 0.00035416753962635994, "timestamp": "2025-09-10 02:32:21.156323", "step": 6304, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:21.187542", "step": 6304, "epoch": 3 }, { "type": "loss", "content": 0.0010335204424336553, "timestamp": "2025-09-10 02:32:21.189449", "step": 6305, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:21.220159", "step": 6305, "epoch": 3 }, { "type": "loss", "content": 0.00010592629405437037, "timestamp": "2025-09-10 02:32:21.221922", "step": 6306, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:21.255421", "step": 6306, "epoch": 3 }, { "type": "loss", "content": 0.0005984573508612812, "timestamp": "2025-09-10 02:32:21.257272", "step": 6307, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:21.286192", "step": 6307, "epoch": 3 }, { "type": "loss", "content": 0.0011838421924039721, "timestamp": "2025-09-10 02:32:21.309628", "step": 6308, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:21.340932", "step": 6308, "epoch": 3 }, { "type": "loss", "content": 0.00021382412523962557, "timestamp": "2025-09-10 02:32:21.342774", "step": 6309, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:21.372961", "step": 6309, "epoch": 3 }, { "type": "loss", "content": 4.185305442661047e-05, "timestamp": "2025-09-10 02:32:21.374703", "step": 6310, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:21.411039", "step": 6310, "epoch": 3 }, { "type": "loss", "content": 0.0213544312864542, "timestamp": "2025-09-10 02:32:21.412835", "step": 6311, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:21.444424", "step": 6311, "epoch": 3 }, { "type": "loss", "content": 0.002948646666482091, "timestamp": "2025-09-10 02:32:21.467921", "step": 6312, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:21.504900", "step": 6312, "epoch": 3 }, { "type": "loss", "content": 0.0010231381747871637, "timestamp": "2025-09-10 02:32:21.506604", "step": 6313, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:21.538686", "step": 6313, "epoch": 3 }, { "type": "loss", "content": 0.029510745778679848, "timestamp": "2025-09-10 02:32:21.540548", "step": 6314, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:21.575090", "step": 6314, "epoch": 3 }, { "type": "loss", "content": 0.00034681681427173316, "timestamp": "2025-09-10 02:32:21.576980", "step": 6315, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:21.611104", "step": 6315, "epoch": 3 }, { "type": "loss", "content": 0.0002904444409068674, "timestamp": "2025-09-10 02:32:21.634571", "step": 6316, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:21.665868", "step": 6316, "epoch": 3 }, { "type": "loss", "content": 0.0010635191574692726, "timestamp": "2025-09-10 02:32:21.667832", "step": 6317, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:21.707074", "step": 6317, "epoch": 3 }, { "type": "loss", "content": 0.0005607870989479125, "timestamp": "2025-09-10 02:32:21.708769", "step": 6318, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:21.748027", "step": 6318, "epoch": 3 }, { "type": "loss", "content": 8.25113311293535e-05, "timestamp": "2025-09-10 02:32:21.749965", "step": 6319, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:21.785144", "step": 6319, "epoch": 3 }, { "type": "loss", "content": 0.00039944160380400717, "timestamp": "2025-09-10 02:32:21.808445", "step": 6320, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:21.837056", "step": 6320, "epoch": 3 }, { "type": "loss", "content": 0.05035491660237312, "timestamp": "2025-09-10 02:32:21.838991", "step": 6321, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:21.867958", "step": 6321, "epoch": 3 }, { "type": "loss", "content": 0.003698610933497548, "timestamp": "2025-09-10 02:32:21.869704", "step": 6322, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:21.898316", "step": 6322, "epoch": 3 }, { "type": "loss", "content": 0.00028630904853343964, "timestamp": "2025-09-10 02:32:21.900190", "step": 6323, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:21.929464", "step": 6323, "epoch": 3 }, { "type": "loss", "content": 0.00029259143047966063, "timestamp": "2025-09-10 02:32:21.952906", "step": 6324, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:21.981991", "step": 6324, "epoch": 3 }, { "type": "loss", "content": 0.0008371643489226699, "timestamp": "2025-09-10 02:32:21.983841", "step": 6325, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:22.012484", "step": 6325, "epoch": 3 }, { "type": "loss", "content": 0.0013441009214147925, "timestamp": "2025-09-10 02:32:22.014228", "step": 6326, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:22.042981", "step": 6326, "epoch": 3 }, { "type": "loss", "content": 0.00048186283675022423, "timestamp": "2025-09-10 02:32:22.044753", "step": 6327, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:22.073713", "step": 6327, "epoch": 3 }, { "type": "loss", "content": 0.001302977092564106, "timestamp": "2025-09-10 02:32:22.097189", "step": 6328, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:22.126709", "step": 6328, "epoch": 3 }, { "type": "loss", "content": 0.0016445436049252748, "timestamp": "2025-09-10 02:32:22.128656", "step": 6329, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:22.157366", "step": 6329, "epoch": 3 }, { "type": "loss", "content": 0.09863261878490448, "timestamp": "2025-09-10 02:32:22.159195", "step": 6330, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:22.188379", "step": 6330, "epoch": 3 }, { "type": "loss", "content": 0.000818633649032563, "timestamp": "2025-09-10 02:32:22.190299", "step": 6331, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:22.219476", "step": 6331, "epoch": 3 }, { "type": "loss", "content": 7.42963093216531e-05, "timestamp": "2025-09-10 02:32:22.242810", "step": 6332, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:22.271481", "step": 6332, "epoch": 3 }, { "type": "loss", "content": 0.03502601757645607, "timestamp": "2025-09-10 02:32:22.273453", "step": 6333, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:22.302613", "step": 6333, "epoch": 3 }, { "type": "loss", "content": 0.0007293486269190907, "timestamp": "2025-09-10 02:32:22.304248", "step": 6334, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:22.332801", "step": 6334, "epoch": 3 }, { "type": "loss", "content": 0.001932331477291882, "timestamp": "2025-09-10 02:32:22.334557", "step": 6335, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:22.363441", "step": 6335, "epoch": 3 }, { "type": "loss", "content": 0.00112222321331501, "timestamp": "2025-09-10 02:32:22.386748", "step": 6336, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:22.415178", "step": 6336, "epoch": 3 }, { "type": "loss", "content": 0.014759731478989124, "timestamp": "2025-09-10 02:32:22.417050", "step": 6337, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:22.445997", "step": 6337, "epoch": 3 }, { "type": "loss", "content": 0.0559990331530571, "timestamp": "2025-09-10 02:32:22.447934", "step": 6338, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:22.476673", "step": 6338, "epoch": 3 }, { "type": "loss", "content": 0.0019543636590242386, "timestamp": "2025-09-10 02:32:22.478321", "step": 6339, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:22.507602", "step": 6339, "epoch": 3 }, { "type": "loss", "content": 0.0009176231105811894, "timestamp": "2025-09-10 02:32:22.530706", "step": 6340, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:22.559608", "step": 6340, "epoch": 3 }, { "type": "loss", "content": 0.00015823465946596116, "timestamp": "2025-09-10 02:32:22.561790", "step": 6341, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:22.590447", "step": 6341, "epoch": 3 }, { "type": "loss", "content": 0.0005021968390792608, "timestamp": "2025-09-10 02:32:22.592155", "step": 6342, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:22.621321", "step": 6342, "epoch": 3 }, { "type": "loss", "content": 0.0017196570988744497, "timestamp": "2025-09-10 02:32:22.623278", "step": 6343, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:22.652815", "step": 6343, "epoch": 3 }, { "type": "loss", "content": 0.04168880358338356, "timestamp": "2025-09-10 02:32:22.676044", "step": 6344, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:22.704993", "step": 6344, "epoch": 3 }, { "type": "loss", "content": 0.0009726889547891915, "timestamp": "2025-09-10 02:32:22.706845", "step": 6345, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:22.736276", "step": 6345, "epoch": 3 }, { "type": "loss", "content": 0.0030025788582861423, "timestamp": "2025-09-10 02:32:22.738119", "step": 6346, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:22.768199", "step": 6346, "epoch": 3 }, { "type": "loss", "content": 0.001672284910455346, "timestamp": "2025-09-10 02:32:22.769909", "step": 6347, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:22.803449", "step": 6347, "epoch": 3 }, { "type": "loss", "content": 0.0004967619897797704, "timestamp": "2025-09-10 02:32:22.826889", "step": 6348, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:22.855883", "step": 6348, "epoch": 3 }, { "type": "loss", "content": 0.0004202341369818896, "timestamp": "2025-09-10 02:32:22.857547", "step": 6349, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:22.886531", "step": 6349, "epoch": 3 }, { "type": "loss", "content": 0.006135419011116028, "timestamp": "2025-09-10 02:32:22.888270", "step": 6350, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:22.917201", "step": 6350, "epoch": 3 }, { "type": "loss", "content": 0.0023682310711592436, "timestamp": "2025-09-10 02:32:22.918790", "step": 6351, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:22.947449", "step": 6351, "epoch": 3 }, { "type": "loss", "content": 0.0017628510249778628, "timestamp": "2025-09-10 02:32:22.970870", "step": 6352, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:22.999829", "step": 6352, "epoch": 3 }, { "type": "loss", "content": 0.02108711563050747, "timestamp": "2025-09-10 02:32:23.001935", "step": 6353, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:23.030897", "step": 6353, "epoch": 3 }, { "type": "loss", "content": 0.004597174469381571, "timestamp": "2025-09-10 02:32:23.032690", "step": 6354, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:23.061529", "step": 6354, "epoch": 3 }, { "type": "loss", "content": 0.001220061327330768, "timestamp": "2025-09-10 02:32:23.063472", "step": 6355, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:23.092605", "step": 6355, "epoch": 3 }, { "type": "loss", "content": 0.0007231011986732483, "timestamp": "2025-09-10 02:32:23.115893", "step": 6356, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:23.144968", "step": 6356, "epoch": 3 }, { "type": "loss", "content": 0.004517923109233379, "timestamp": "2025-09-10 02:32:23.146881", "step": 6357, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:23.177243", "step": 6357, "epoch": 3 }, { "type": "loss", "content": 0.0022846742067486048, "timestamp": "2025-09-10 02:32:23.179200", "step": 6358, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:23.208747", "step": 6358, "epoch": 3 }, { "type": "loss", "content": 0.0007761307060718536, "timestamp": "2025-09-10 02:32:23.210728", "step": 6359, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:23.243331", "step": 6359, "epoch": 3 }, { "type": "loss", "content": 0.005061306990683079, "timestamp": "2025-09-10 02:32:23.266596", "step": 6360, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:23.296223", "step": 6360, "epoch": 3 }, { "type": "loss", "content": 0.00022909880499355495, "timestamp": "2025-09-10 02:32:23.297906", "step": 6361, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:23.326536", "step": 6361, "epoch": 3 }, { "type": "loss", "content": 0.010702590458095074, "timestamp": "2025-09-10 02:32:23.328252", "step": 6362, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:23.359631", "step": 6362, "epoch": 3 }, { "type": "loss", "content": 0.002730281325057149, "timestamp": "2025-09-10 02:32:23.361254", "step": 6363, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:23.392880", "step": 6363, "epoch": 3 }, { "type": "loss", "content": 0.01938006654381752, "timestamp": "2025-09-10 02:32:23.416238", "step": 6364, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:23.446984", "step": 6364, "epoch": 3 }, { "type": "loss", "content": 0.0030088615603744984, "timestamp": "2025-09-10 02:32:23.448832", "step": 6365, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:23.483805", "step": 6365, "epoch": 3 }, { "type": "loss", "content": 0.002860612003132701, "timestamp": "2025-09-10 02:32:23.485764", "step": 6366, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:23.519135", "step": 6366, "epoch": 3 }, { "type": "loss", "content": 0.0010435706935822964, "timestamp": "2025-09-10 02:32:23.520982", "step": 6367, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:23.553370", "step": 6367, "epoch": 3 }, { "type": "loss", "content": 0.008101065643131733, "timestamp": "2025-09-10 02:32:23.576939", "step": 6368, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:23.609474", "step": 6368, "epoch": 3 }, { "type": "loss", "content": 0.0003759710234589875, "timestamp": "2025-09-10 02:32:23.611411", "step": 6369, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:23.646064", "step": 6369, "epoch": 3 }, { "type": "loss", "content": 0.0014672511024400592, "timestamp": "2025-09-10 02:32:23.647880", "step": 6370, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:23.685207", "step": 6370, "epoch": 3 }, { "type": "loss", "content": 0.005713839549571276, "timestamp": "2025-09-10 02:32:23.687210", "step": 6371, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:23.726817", "step": 6371, "epoch": 3 }, { "type": "loss", "content": 0.0010342712048441172, "timestamp": "2025-09-10 02:32:23.750593", "step": 6372, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:23.785372", "step": 6372, "epoch": 3 }, { "type": "loss", "content": 0.0010816141730174422, "timestamp": "2025-09-10 02:32:23.787580", "step": 6373, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:23.821922", "step": 6373, "epoch": 3 }, { "type": "loss", "content": 0.0015200666384771466, "timestamp": "2025-09-10 02:32:23.823629", "step": 6374, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:23.852503", "step": 6374, "epoch": 3 }, { "type": "loss", "content": 0.003132024547085166, "timestamp": "2025-09-10 02:32:23.854410", "step": 6375, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:23.883606", "step": 6375, "epoch": 3 }, { "type": "loss", "content": 0.0013345686020329595, "timestamp": "2025-09-10 02:32:23.906930", "step": 6376, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:23.936352", "step": 6376, "epoch": 3 }, { "type": "loss", "content": 0.003480277955532074, "timestamp": "2025-09-10 02:32:23.938298", "step": 6377, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:23.967827", "step": 6377, "epoch": 3 }, { "type": "loss", "content": 0.0006901667220517993, "timestamp": "2025-09-10 02:32:23.969765", "step": 6378, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:23.998370", "step": 6378, "epoch": 3 }, { "type": "loss", "content": 0.0003840525168925524, "timestamp": "2025-09-10 02:32:24.000437", "step": 6379, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:24.029704", "step": 6379, "epoch": 3 }, { "type": "loss", "content": 0.003375781001523137, "timestamp": "2025-09-10 02:32:24.052864", "step": 6380, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:24.081911", "step": 6380, "epoch": 3 }, { "type": "loss", "content": 0.001060336478985846, "timestamp": "2025-09-10 02:32:24.083641", "step": 6381, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:24.112372", "step": 6381, "epoch": 3 }, { "type": "loss", "content": 0.00025973698939196765, "timestamp": "2025-09-10 02:32:24.114068", "step": 6382, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:24.142751", "step": 6382, "epoch": 3 }, { "type": "loss", "content": 0.0006213196320459247, "timestamp": "2025-09-10 02:32:24.144467", "step": 6383, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:24.174226", "step": 6383, "epoch": 3 }, { "type": "loss", "content": 0.0005688367527909577, "timestamp": "2025-09-10 02:32:24.197521", "step": 6384, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 96 ], "batch_size": 8, "flops": 1898404492032 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 5, 80 ], "batch_size": 8, "flops": 1582003754624 } ], "timestamp": "2025-09-10 02:32:26.080357", "step": 6384, "epoch": 3 }, { "type": "pplx", "content": 2602046.62760713, "timestamp": "2025-09-10 02:32:26.082134", "step": 6384, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:26.110040", "step": 6384, "epoch": 3 }, { "type": "loss", "content": 0.0007975572370924056, "timestamp": "2025-09-10 02:32:26.111781", "step": 6385, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:26.141032", "step": 6385, "epoch": 3 }, { "type": "loss", "content": 0.032970402389764786, "timestamp": "2025-09-10 02:32:26.142887", "step": 6386, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:26.171430", "step": 6386, "epoch": 3 }, { "type": "loss", "content": 0.00016916354070417583, "timestamp": "2025-09-10 02:32:26.173215", "step": 6387, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:26.202098", "step": 6387, "epoch": 3 }, { "type": "loss", "content": 0.0001721300941426307, "timestamp": "2025-09-10 02:32:26.225762", "step": 6388, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:26.254988", "step": 6388, "epoch": 3 }, { "type": "loss", "content": 0.00022461578191723675, "timestamp": "2025-09-10 02:32:26.256926", "step": 6389, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:26.286080", "step": 6389, "epoch": 3 }, { "type": "loss", "content": 0.0004514531174208969, "timestamp": "2025-09-10 02:32:26.291189", "step": 6390, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:26.322315", "step": 6390, "epoch": 3 }, { "type": "loss", "content": 0.0002696145966183394, "timestamp": "2025-09-10 02:32:26.324143", "step": 6391, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:26.353218", "step": 6391, "epoch": 3 }, { "type": "loss", "content": 0.002055809134617448, "timestamp": "2025-09-10 02:32:26.376654", "step": 6392, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:26.407933", "step": 6392, "epoch": 3 }, { "type": "loss", "content": 0.012479028664529324, "timestamp": "2025-09-10 02:32:26.409645", "step": 6393, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:26.438377", "step": 6393, "epoch": 3 }, { "type": "loss", "content": 0.0008340683998540044, "timestamp": "2025-09-10 02:32:26.440065", "step": 6394, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:26.469193", "step": 6394, "epoch": 3 }, { "type": "loss", "content": 0.0007035091402940452, "timestamp": "2025-09-10 02:32:26.479478", "step": 6395, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:26.510827", "step": 6395, "epoch": 3 }, { "type": "loss", "content": 0.004726804792881012, "timestamp": "2025-09-10 02:32:26.534378", "step": 6396, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:26.564634", "step": 6396, "epoch": 3 }, { "type": "loss", "content": 0.00065030058613047, "timestamp": "2025-09-10 02:32:26.567185", "step": 6397, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:26.599118", "step": 6397, "epoch": 3 }, { "type": "loss", "content": 0.0008404856198467314, "timestamp": "2025-09-10 02:32:26.600716", "step": 6398, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:26.632272", "step": 6398, "epoch": 3 }, { "type": "loss", "content": 0.024703437462449074, "timestamp": "2025-09-10 02:32:26.634083", "step": 6399, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:26.662807", "step": 6399, "epoch": 3 }, { "type": "loss", "content": 0.027414944022893906, "timestamp": "2025-09-10 02:32:26.686458", "step": 6400, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:26.715363", "step": 6400, "epoch": 3 }, { "type": "loss", "content": 0.0026188211049884558, "timestamp": "2025-09-10 02:32:26.717884", "step": 6401, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:26.747095", "step": 6401, "epoch": 3 }, { "type": "loss", "content": 0.01582406833767891, "timestamp": "2025-09-10 02:32:26.751325", "step": 6402, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:26.787459", "step": 6402, "epoch": 3 }, { "type": "loss", "content": 0.0018050411017611623, "timestamp": "2025-09-10 02:32:26.789057", "step": 6403, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:26.827290", "step": 6403, "epoch": 3 }, { "type": "loss", "content": 0.00014450155140366405, "timestamp": "2025-09-10 02:32:26.854654", "step": 6404, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:26.886558", "step": 6404, "epoch": 3 }, { "type": "loss", "content": 0.0009893247624859214, "timestamp": "2025-09-10 02:32:26.888367", "step": 6405, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:26.917573", "step": 6405, "epoch": 3 }, { "type": "loss", "content": 0.05074020102620125, "timestamp": "2025-09-10 02:32:26.919349", "step": 6406, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:26.948263", "step": 6406, "epoch": 3 }, { "type": "loss", "content": 0.0004242721770424396, "timestamp": "2025-09-10 02:32:26.950250", "step": 6407, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:26.981601", "step": 6407, "epoch": 3 }, { "type": "loss", "content": 0.0032885505352169275, "timestamp": "2025-09-10 02:32:27.010126", "step": 6408, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:27.039001", "step": 6408, "epoch": 3 }, { "type": "loss", "content": 0.00021194576402194798, "timestamp": "2025-09-10 02:32:27.040915", "step": 6409, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:27.069641", "step": 6409, "epoch": 3 }, { "type": "loss", "content": 0.00020467361900955439, "timestamp": "2025-09-10 02:32:27.073547", "step": 6410, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:27.103346", "step": 6410, "epoch": 3 }, { "type": "loss", "content": 0.0005671937251463532, "timestamp": "2025-09-10 02:32:27.105326", "step": 6411, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:32:27.136431", "step": 6411, "epoch": 3 }, { "type": "loss", "content": 0.00018863029254134744, "timestamp": "2025-09-10 02:32:27.159798", "step": 6412, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:27.189605", "step": 6412, "epoch": 3 }, { "type": "loss", "content": 0.001088503166101873, "timestamp": "2025-09-10 02:32:27.191484", "step": 6413, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:27.222539", "step": 6413, "epoch": 3 }, { "type": "loss", "content": 0.022781027480959892, "timestamp": "2025-09-10 02:32:27.224260", "step": 6414, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:27.256822", "step": 6414, "epoch": 3 }, { "type": "loss", "content": 0.00010405414650449529, "timestamp": "2025-09-10 02:32:27.258526", "step": 6415, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:27.287476", "step": 6415, "epoch": 3 }, { "type": "loss", "content": 0.012186779640614986, "timestamp": "2025-09-10 02:32:27.310809", "step": 6416, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:27.342880", "step": 6416, "epoch": 3 }, { "type": "loss", "content": 0.0024998204316943884, "timestamp": "2025-09-10 02:32:27.344656", "step": 6417, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:27.374541", "step": 6417, "epoch": 3 }, { "type": "loss", "content": 0.0008467523148283362, "timestamp": "2025-09-10 02:32:27.376279", "step": 6418, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:27.411390", "step": 6418, "epoch": 3 }, { "type": "loss", "content": 0.015074108727276325, "timestamp": "2025-09-10 02:32:27.413530", "step": 6419, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:27.447029", "step": 6419, "epoch": 3 }, { "type": "loss", "content": 0.0007348424405790865, "timestamp": "2025-09-10 02:32:27.470643", "step": 6420, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:27.506370", "step": 6420, "epoch": 3 }, { "type": "loss", "content": 0.00017972067871596664, "timestamp": "2025-09-10 02:32:27.508356", "step": 6421, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:32:27.539217", "step": 6421, "epoch": 3 }, { "type": "loss", "content": 0.02017919160425663, "timestamp": "2025-09-10 02:32:27.540995", "step": 6422, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:27.575634", "step": 6422, "epoch": 3 }, { "type": "loss", "content": 0.002349887741729617, "timestamp": "2025-09-10 02:32:27.577425", "step": 6423, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:27.609793", "step": 6423, "epoch": 3 }, { "type": "loss", "content": 0.01225587073713541, "timestamp": "2025-09-10 02:32:27.633422", "step": 6424, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:27.664460", "step": 6424, "epoch": 3 }, { "type": "loss", "content": 0.00065768783679232, "timestamp": "2025-09-10 02:32:27.666521", "step": 6425, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:27.704778", "step": 6425, "epoch": 3 }, { "type": "loss", "content": 0.0003179568739142269, "timestamp": "2025-09-10 02:32:27.706474", "step": 6426, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:27.744842", "step": 6426, "epoch": 3 }, { "type": "loss", "content": 0.0015685457037761807, "timestamp": "2025-09-10 02:32:27.746561", "step": 6427, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:27.780899", "step": 6427, "epoch": 3 }, { "type": "loss", "content": 0.0005152070079930127, "timestamp": "2025-09-10 02:32:27.804217", "step": 6428, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:27.832914", "step": 6428, "epoch": 3 }, { "type": "loss", "content": 0.0001408977113896981, "timestamp": "2025-09-10 02:32:27.834813", "step": 6429, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:27.863907", "step": 6429, "epoch": 3 }, { "type": "loss", "content": 0.002704629208892584, "timestamp": "2025-09-10 02:32:27.865720", "step": 6430, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:27.894410", "step": 6430, "epoch": 3 }, { "type": "loss", "content": 0.00012314648483879864, "timestamp": "2025-09-10 02:32:27.896345", "step": 6431, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:27.925444", "step": 6431, "epoch": 3 }, { "type": "loss", "content": 0.00018916084081865847, "timestamp": "2025-09-10 02:32:27.949052", "step": 6432, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:27.978103", "step": 6432, "epoch": 3 }, { "type": "loss", "content": 0.0003389800258446485, "timestamp": "2025-09-10 02:32:27.979804", "step": 6433, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:28.008486", "step": 6433, "epoch": 3 }, { "type": "loss", "content": 0.0007000124314799905, "timestamp": "2025-09-10 02:32:28.010596", "step": 6434, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:28.041613", "step": 6434, "epoch": 3 }, { "type": "loss", "content": 0.0007533311145380139, "timestamp": "2025-09-10 02:32:28.043374", "step": 6435, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 144 ], "flops": 4271696270016 }, "timestamp": "2025-09-10 02:32:28.072405", "step": 6435, "epoch": 3 }, { "type": "loss", "content": 0.0007968792924657464, "timestamp": "2025-09-10 02:32:28.095919", "step": 6436, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:28.124707", "step": 6436, "epoch": 3 }, { "type": "loss", "content": 0.024879014119505882, "timestamp": "2025-09-10 02:32:28.126523", "step": 6437, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:28.155148", "step": 6437, "epoch": 3 }, { "type": "loss", "content": 0.00015964091289788485, "timestamp": "2025-09-10 02:32:28.157015", "step": 6438, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:28.185887", "step": 6438, "epoch": 3 }, { "type": "loss", "content": 0.004214493092149496, "timestamp": "2025-09-10 02:32:28.187736", "step": 6439, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:28.216726", "step": 6439, "epoch": 3 }, { "type": "loss", "content": 0.0002616394485812634, "timestamp": "2025-09-10 02:32:28.239809", "step": 6440, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:28.269085", "step": 6440, "epoch": 3 }, { "type": "loss", "content": 0.0021432521753013134, "timestamp": "2025-09-10 02:32:28.271027", "step": 6441, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:28.300758", "step": 6441, "epoch": 3 }, { "type": "loss", "content": 0.0003029382205568254, "timestamp": "2025-09-10 02:32:28.302630", "step": 6442, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:28.331086", "step": 6442, "epoch": 3 }, { "type": "loss", "content": 0.009857903234660625, "timestamp": "2025-09-10 02:32:28.333089", "step": 6443, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:28.362184", "step": 6443, "epoch": 3 }, { "type": "loss", "content": 0.0071450648829340935, "timestamp": "2025-09-10 02:32:28.385443", "step": 6444, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:28.415428", "step": 6444, "epoch": 3 }, { "type": "loss", "content": 0.046203065663576126, "timestamp": "2025-09-10 02:32:28.417207", "step": 6445, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:28.446450", "step": 6445, "epoch": 3 }, { "type": "loss", "content": 0.00039324097451753914, "timestamp": "2025-09-10 02:32:28.448320", "step": 6446, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:28.477751", "step": 6446, "epoch": 3 }, { "type": "loss", "content": 0.0001980369124794379, "timestamp": "2025-09-10 02:32:28.479467", "step": 6447, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:28.508459", "step": 6447, "epoch": 3 }, { "type": "loss", "content": 0.00044665136374533176, "timestamp": "2025-09-10 02:32:28.531909", "step": 6448, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:28.561059", "step": 6448, "epoch": 3 }, { "type": "loss", "content": 0.00021955862757749856, "timestamp": "2025-09-10 02:32:28.563011", "step": 6449, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:28.592061", "step": 6449, "epoch": 3 }, { "type": "loss", "content": 0.028430433943867683, "timestamp": "2025-09-10 02:32:28.593655", "step": 6450, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:28.622758", "step": 6450, "epoch": 3 }, { "type": "loss", "content": 0.0043237460777163506, "timestamp": "2025-09-10 02:32:28.624667", "step": 6451, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:28.654233", "step": 6451, "epoch": 3 }, { "type": "loss", "content": 0.0006920182495377958, "timestamp": "2025-09-10 02:32:28.677676", "step": 6452, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:28.707426", "step": 6452, "epoch": 3 }, { "type": "loss", "content": 0.0002523947914596647, "timestamp": "2025-09-10 02:32:28.709393", "step": 6453, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:28.739242", "step": 6453, "epoch": 3 }, { "type": "loss", "content": 0.004871721845120192, "timestamp": "2025-09-10 02:32:28.741467", "step": 6454, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:28.772034", "step": 6454, "epoch": 3 }, { "type": "loss", "content": 0.00031806857441551983, "timestamp": "2025-09-10 02:32:28.773788", "step": 6455, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:28.807393", "step": 6455, "epoch": 3 }, { "type": "loss", "content": 0.00031787189072929323, "timestamp": "2025-09-10 02:32:28.830702", "step": 6456, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:28.859662", "step": 6456, "epoch": 3 }, { "type": "loss", "content": 0.0002223090996267274, "timestamp": "2025-09-10 02:32:28.861417", "step": 6457, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:28.890544", "step": 6457, "epoch": 3 }, { "type": "loss", "content": 0.01526606921106577, "timestamp": "2025-09-10 02:32:28.892390", "step": 6458, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:28.920890", "step": 6458, "epoch": 3 }, { "type": "loss", "content": 0.00029888629796914756, "timestamp": "2025-09-10 02:32:28.922848", "step": 6459, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:28.951455", "step": 6459, "epoch": 3 }, { "type": "loss", "content": 0.00015020419959910214, "timestamp": "2025-09-10 02:32:28.974975", "step": 6460, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:29.003695", "step": 6460, "epoch": 3 }, { "type": "loss", "content": 0.000799665111117065, "timestamp": "2025-09-10 02:32:29.005540", "step": 6461, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:29.034179", "step": 6461, "epoch": 3 }, { "type": "loss", "content": 0.019971687346696854, "timestamp": "2025-09-10 02:32:29.035987", "step": 6462, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:29.065180", "step": 6462, "epoch": 3 }, { "type": "loss", "content": 0.0021395846270024776, "timestamp": "2025-09-10 02:32:29.067000", "step": 6463, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:29.095920", "step": 6463, "epoch": 3 }, { "type": "loss", "content": 0.005740799009799957, "timestamp": "2025-09-10 02:32:29.119107", "step": 6464, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:29.149274", "step": 6464, "epoch": 3 }, { "type": "loss", "content": 0.0009019740973599255, "timestamp": "2025-09-10 02:32:29.151097", "step": 6465, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:29.181349", "step": 6465, "epoch": 3 }, { "type": "loss", "content": 0.0002621648891363293, "timestamp": "2025-09-10 02:32:29.182790", "step": 6466, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:29.216922", "step": 6466, "epoch": 3 }, { "type": "loss", "content": 0.001667393953539431, "timestamp": "2025-09-10 02:32:29.218598", "step": 6467, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:29.254355", "step": 6467, "epoch": 3 }, { "type": "loss", "content": 0.00014791438297834247, "timestamp": "2025-09-10 02:32:29.277772", "step": 6468, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:29.308880", "step": 6468, "epoch": 3 }, { "type": "loss", "content": 0.00028856098651885986, "timestamp": "2025-09-10 02:32:29.310965", "step": 6469, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:29.342171", "step": 6469, "epoch": 3 }, { "type": "loss", "content": 0.0004741291340906173, "timestamp": "2025-09-10 02:32:29.343911", "step": 6470, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:29.374174", "step": 6470, "epoch": 3 }, { "type": "loss", "content": 0.00037020345916971564, "timestamp": "2025-09-10 02:32:29.375910", "step": 6471, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:29.409523", "step": 6471, "epoch": 3 }, { "type": "loss", "content": 0.0003086226643063128, "timestamp": "2025-09-10 02:32:29.432799", "step": 6472, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:29.464501", "step": 6472, "epoch": 3 }, { "type": "loss", "content": 0.00035341139300726354, "timestamp": "2025-09-10 02:32:29.466338", "step": 6473, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:29.503123", "step": 6473, "epoch": 3 }, { "type": "loss", "content": 0.0007538163335993886, "timestamp": "2025-09-10 02:32:29.505038", "step": 6474, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:29.535596", "step": 6474, "epoch": 3 }, { "type": "loss", "content": 0.00029482340323738754, "timestamp": "2025-09-10 02:32:29.537350", "step": 6475, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:29.570351", "step": 6475, "epoch": 3 }, { "type": "loss", "content": 0.00020520342513918877, "timestamp": "2025-09-10 02:32:29.593609", "step": 6476, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:29.624945", "step": 6476, "epoch": 3 }, { "type": "loss", "content": 0.0007698552799411118, "timestamp": "2025-09-10 02:32:29.626829", "step": 6477, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:29.660422", "step": 6477, "epoch": 3 }, { "type": "loss", "content": 0.00027317609055899084, "timestamp": "2025-09-10 02:32:29.662133", "step": 6478, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:29.699765", "step": 6478, "epoch": 3 }, { "type": "loss", "content": 0.00039539759745821357, "timestamp": "2025-09-10 02:32:29.701609", "step": 6479, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:29.738272", "step": 6479, "epoch": 3 }, { "type": "loss", "content": 0.0001573774206917733, "timestamp": "2025-09-10 02:32:29.761794", "step": 6480, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:29.798776", "step": 6480, "epoch": 3 }, { "type": "loss", "content": 0.0009613978327251971, "timestamp": "2025-09-10 02:32:29.800680", "step": 6481, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:29.829808", "step": 6481, "epoch": 3 }, { "type": "loss", "content": 0.00011410355364205316, "timestamp": "2025-09-10 02:32:29.831698", "step": 6482, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:29.861112", "step": 6482, "epoch": 3 }, { "type": "loss", "content": 0.009019218385219574, "timestamp": "2025-09-10 02:32:29.863074", "step": 6483, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:29.892711", "step": 6483, "epoch": 3 }, { "type": "loss", "content": 0.0010529905557632446, "timestamp": "2025-09-10 02:32:29.916201", "step": 6484, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:29.945707", "step": 6484, "epoch": 3 }, { "type": "loss", "content": 0.0009112291736528277, "timestamp": "2025-09-10 02:32:29.947684", "step": 6485, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:29.976565", "step": 6485, "epoch": 3 }, { "type": "loss", "content": 0.0009821915300562978, "timestamp": "2025-09-10 02:32:29.978294", "step": 6486, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:30.007599", "step": 6486, "epoch": 3 }, { "type": "loss", "content": 0.0005452854675240815, "timestamp": "2025-09-10 02:32:30.009395", "step": 6487, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:30.038441", "step": 6487, "epoch": 3 }, { "type": "loss", "content": 0.0002243387425551191, "timestamp": "2025-09-10 02:32:30.061890", "step": 6488, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:30.091022", "step": 6488, "epoch": 3 }, { "type": "loss", "content": 0.004632903728634119, "timestamp": "2025-09-10 02:32:30.092822", "step": 6489, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:30.121640", "step": 6489, "epoch": 3 }, { "type": "loss", "content": 0.0002741267380770296, "timestamp": "2025-09-10 02:32:30.123853", "step": 6490, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:30.153171", "step": 6490, "epoch": 3 }, { "type": "loss", "content": 0.002974990289658308, "timestamp": "2025-09-10 02:32:30.155007", "step": 6491, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:30.183634", "step": 6491, "epoch": 3 }, { "type": "loss", "content": 0.0008022545953281224, "timestamp": "2025-09-10 02:32:30.207133", "step": 6492, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:30.236240", "step": 6492, "epoch": 3 }, { "type": "loss", "content": 0.001183294109068811, "timestamp": "2025-09-10 02:32:30.238045", "step": 6493, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:30.267032", "step": 6493, "epoch": 3 }, { "type": "loss", "content": 0.00015620666090399027, "timestamp": "2025-09-10 02:32:30.268715", "step": 6494, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:30.297612", "step": 6494, "epoch": 3 }, { "type": "loss", "content": 0.001702985493466258, "timestamp": "2025-09-10 02:32:30.299391", "step": 6495, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:30.328773", "step": 6495, "epoch": 3 }, { "type": "loss", "content": 0.009991460479795933, "timestamp": "2025-09-10 02:32:30.353644", "step": 6496, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:30.382997", "step": 6496, "epoch": 3 }, { "type": "loss", "content": 0.0010712883668020368, "timestamp": "2025-09-10 02:32:30.384842", "step": 6497, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:30.413409", "step": 6497, "epoch": 3 }, { "type": "loss", "content": 0.0003185864188708365, "timestamp": "2025-09-10 02:32:30.415178", "step": 6498, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:30.444350", "step": 6498, "epoch": 3 }, { "type": "loss", "content": 0.03873638063669205, "timestamp": "2025-09-10 02:32:30.446077", "step": 6499, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:30.474911", "step": 6499, "epoch": 3 }, { "type": "loss", "content": 0.01748695597052574, "timestamp": "2025-09-10 02:32:30.498343", "step": 6500, "epoch": 3 }, { "type": "info", "content": "Checkpoint saved at step 6500", "timestamp": "2025-09-10 02:32:35.848016", "step": 6500, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:35.882307", "step": 6500, "epoch": 3 }, { "type": "loss", "content": 0.0004379930905997753, "timestamp": "2025-09-10 02:32:35.884426", "step": 6501, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:35.914425", "step": 6501, "epoch": 3 }, { "type": "loss", "content": 0.00020205129112582654, "timestamp": "2025-09-10 02:32:35.916494", "step": 6502, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:35.946040", "step": 6502, "epoch": 3 }, { "type": "loss", "content": 0.00021745593403466046, "timestamp": "2025-09-10 02:32:35.948076", "step": 6503, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:35.977988", "step": 6503, "epoch": 3 }, { "type": "loss", "content": 0.054765503853559494, "timestamp": "2025-09-10 02:32:36.001826", "step": 6504, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:36.032374", "step": 6504, "epoch": 3 }, { "type": "loss", "content": 0.0003472270618658513, "timestamp": "2025-09-10 02:32:36.035343", "step": 6505, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:36.065999", "step": 6505, "epoch": 3 }, { "type": "loss", "content": 0.0031078639440238476, "timestamp": "2025-09-10 02:32:36.067831", "step": 6506, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:36.102429", "step": 6506, "epoch": 3 }, { "type": "loss", "content": 0.00019415069255046546, "timestamp": "2025-09-10 02:32:36.104078", "step": 6507, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:36.132983", "step": 6507, "epoch": 3 }, { "type": "loss", "content": 0.00020654825493693352, "timestamp": "2025-09-10 02:32:36.156577", "step": 6508, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:36.185567", "step": 6508, "epoch": 3 }, { "type": "loss", "content": 0.0001644888980081305, "timestamp": "2025-09-10 02:32:36.187513", "step": 6509, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:36.216402", "step": 6509, "epoch": 3 }, { "type": "loss", "content": 0.00035848282277584076, "timestamp": "2025-09-10 02:32:36.218956", "step": 6510, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:36.248694", "step": 6510, "epoch": 3 }, { "type": "loss", "content": 0.0015532153192907572, "timestamp": "2025-09-10 02:32:36.250663", "step": 6511, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:36.279957", "step": 6511, "epoch": 3 }, { "type": "loss", "content": 0.00032827811082825065, "timestamp": "2025-09-10 02:32:36.304049", "step": 6512, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:36.338393", "step": 6512, "epoch": 3 }, { "type": "loss", "content": 0.0003724382841028273, "timestamp": "2025-09-10 02:32:36.340184", "step": 6513, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:36.369704", "step": 6513, "epoch": 3 }, { "type": "loss", "content": 0.000916356104426086, "timestamp": "2025-09-10 02:32:36.371431", "step": 6514, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:36.400696", "step": 6514, "epoch": 3 }, { "type": "loss", "content": 0.00069367018295452, "timestamp": "2025-09-10 02:32:36.403946", "step": 6515, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:36.434251", "step": 6515, "epoch": 3 }, { "type": "loss", "content": 0.00020402041263878345, "timestamp": "2025-09-10 02:32:36.457611", "step": 6516, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:36.486949", "step": 6516, "epoch": 3 }, { "type": "loss", "content": 0.00026494014309719205, "timestamp": "2025-09-10 02:32:36.488841", "step": 6517, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:36.518078", "step": 6517, "epoch": 3 }, { "type": "loss", "content": 0.0015278218779712915, "timestamp": "2025-09-10 02:32:36.519925", "step": 6518, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:36.548784", "step": 6518, "epoch": 3 }, { "type": "loss", "content": 0.00026909203734248877, "timestamp": "2025-09-10 02:32:36.550848", "step": 6519, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:36.580220", "step": 6519, "epoch": 3 }, { "type": "loss", "content": 0.0050692250952124596, "timestamp": "2025-09-10 02:32:36.603916", "step": 6520, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:36.633367", "step": 6520, "epoch": 3 }, { "type": "loss", "content": 0.008903230540454388, "timestamp": "2025-09-10 02:32:36.635194", "step": 6521, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:36.664694", "step": 6521, "epoch": 3 }, { "type": "loss", "content": 0.005301364231854677, "timestamp": "2025-09-10 02:32:36.666424", "step": 6522, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:36.696205", "step": 6522, "epoch": 3 }, { "type": "loss", "content": 0.001449521048925817, "timestamp": "2025-09-10 02:32:36.698179", "step": 6523, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:36.728243", "step": 6523, "epoch": 3 }, { "type": "loss", "content": 0.0001892952568596229, "timestamp": "2025-09-10 02:32:36.751747", "step": 6524, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:36.784415", "step": 6524, "epoch": 3 }, { "type": "loss", "content": 0.000138781892019324, "timestamp": "2025-09-10 02:32:36.786441", "step": 6525, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:36.820062", "step": 6525, "epoch": 3 }, { "type": "loss", "content": 0.0004280885332264006, "timestamp": "2025-09-10 02:32:36.822254", "step": 6526, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:36.851819", "step": 6526, "epoch": 3 }, { "type": "loss", "content": 0.00021600235777441412, "timestamp": "2025-09-10 02:32:36.853822", "step": 6527, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:36.882890", "step": 6527, "epoch": 3 }, { "type": "loss", "content": 0.00652282265946269, "timestamp": "2025-09-10 02:32:36.906555", "step": 6528, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:36.935891", "step": 6528, "epoch": 3 }, { "type": "loss", "content": 0.0005161373992450535, "timestamp": "2025-09-10 02:32:36.937785", "step": 6529, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:36.966574", "step": 6529, "epoch": 3 }, { "type": "loss", "content": 0.0001188876703963615, "timestamp": "2025-09-10 02:32:36.968460", "step": 6530, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:36.997903", "step": 6530, "epoch": 3 }, { "type": "loss", "content": 0.0005654821288771927, "timestamp": "2025-09-10 02:32:36.999944", "step": 6531, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:37.028839", "step": 6531, "epoch": 3 }, { "type": "loss", "content": 0.0016985258553177118, "timestamp": "2025-09-10 02:32:37.052588", "step": 6532, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:37.082164", "step": 6532, "epoch": 3 }, { "type": "loss", "content": 0.05521092563867569, "timestamp": "2025-09-10 02:32:37.083835", "step": 6533, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:37.112744", "step": 6533, "epoch": 3 }, { "type": "loss", "content": 0.0023533659987151623, "timestamp": "2025-09-10 02:32:37.114501", "step": 6534, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:37.143504", "step": 6534, "epoch": 3 }, { "type": "loss", "content": 0.00022290900233201683, "timestamp": "2025-09-10 02:32:37.145571", "step": 6535, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:37.176165", "step": 6535, "epoch": 3 }, { "type": "loss", "content": 0.00036111968802288175, "timestamp": "2025-09-10 02:32:37.199493", "step": 6536, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 96 ], "batch_size": 8, "flops": 1898404492032 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 5, 80 ], "batch_size": 8, "flops": 1582003754624 } ], "timestamp": "2025-09-10 02:32:39.104896", "step": 6536, "epoch": 3 }, { "type": "pplx", "content": 2442277.5126239713, "timestamp": "2025-09-10 02:32:39.106861", "step": 6536, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:39.135396", "step": 6536, "epoch": 3 }, { "type": "loss", "content": 9.491186210652813e-05, "timestamp": "2025-09-10 02:32:39.137280", "step": 6537, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:39.166639", "step": 6537, "epoch": 3 }, { "type": "loss", "content": 0.002770493272691965, "timestamp": "2025-09-10 02:32:39.168821", "step": 6538, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:39.197696", "step": 6538, "epoch": 3 }, { "type": "loss", "content": 9.687560668680817e-05, "timestamp": "2025-09-10 02:32:39.199608", "step": 6539, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:39.228426", "step": 6539, "epoch": 3 }, { "type": "loss", "content": 0.00010829438542714342, "timestamp": "2025-09-10 02:32:39.251928", "step": 6540, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:39.281801", "step": 6540, "epoch": 3 }, { "type": "loss", "content": 9.18650912353769e-05, "timestamp": "2025-09-10 02:32:39.283585", "step": 6541, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:39.313806", "step": 6541, "epoch": 3 }, { "type": "loss", "content": 0.021688321605324745, "timestamp": "2025-09-10 02:32:39.315828", "step": 6542, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:39.348751", "step": 6542, "epoch": 3 }, { "type": "loss", "content": 0.00018935799016617239, "timestamp": "2025-09-10 02:32:39.350479", "step": 6543, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:39.382829", "step": 6543, "epoch": 3 }, { "type": "loss", "content": 0.001111075864173472, "timestamp": "2025-09-10 02:32:39.406216", "step": 6544, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:39.440121", "step": 6544, "epoch": 3 }, { "type": "loss", "content": 0.0008615261758677661, "timestamp": "2025-09-10 02:32:39.441900", "step": 6545, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:39.475702", "step": 6545, "epoch": 3 }, { "type": "loss", "content": 0.0017964442959055305, "timestamp": "2025-09-10 02:32:39.477680", "step": 6546, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:39.511839", "step": 6546, "epoch": 3 }, { "type": "loss", "content": 0.00037840628647245467, "timestamp": "2025-09-10 02:32:39.514089", "step": 6547, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:39.545205", "step": 6547, "epoch": 3 }, { "type": "loss", "content": 0.0003863736055791378, "timestamp": "2025-09-10 02:32:39.568621", "step": 6548, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:39.601634", "step": 6548, "epoch": 3 }, { "type": "loss", "content": 0.00015133467968553305, "timestamp": "2025-09-10 02:32:39.603773", "step": 6549, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:39.637071", "step": 6549, "epoch": 3 }, { "type": "loss", "content": 0.0020984576549381018, "timestamp": "2025-09-10 02:32:39.639094", "step": 6550, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:39.671372", "step": 6550, "epoch": 3 }, { "type": "loss", "content": 9.021971345646307e-05, "timestamp": "2025-09-10 02:32:39.673127", "step": 6551, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:39.710146", "step": 6551, "epoch": 3 }, { "type": "loss", "content": 0.0017723581986501813, "timestamp": "2025-09-10 02:32:39.733782", "step": 6552, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:39.768078", "step": 6552, "epoch": 3 }, { "type": "loss", "content": 0.019412502646446228, "timestamp": "2025-09-10 02:32:39.769980", "step": 6553, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:39.808623", "step": 6553, "epoch": 3 }, { "type": "loss", "content": 0.00012914600665681064, "timestamp": "2025-09-10 02:32:39.810616", "step": 6554, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:39.840018", "step": 6554, "epoch": 3 }, { "type": "loss", "content": 0.024290457367897034, "timestamp": "2025-09-10 02:32:39.841957", "step": 6555, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:39.870807", "step": 6555, "epoch": 3 }, { "type": "loss", "content": 0.0001335710840066895, "timestamp": "2025-09-10 02:32:39.894147", "step": 6556, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:39.923489", "step": 6556, "epoch": 3 }, { "type": "loss", "content": 0.0010500989155843854, "timestamp": "2025-09-10 02:32:39.925440", "step": 6557, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:39.954724", "step": 6557, "epoch": 3 }, { "type": "loss", "content": 0.00023776150192134082, "timestamp": "2025-09-10 02:32:39.956591", "step": 6558, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:39.985615", "step": 6558, "epoch": 3 }, { "type": "loss", "content": 0.002251929370686412, "timestamp": "2025-09-10 02:32:39.987556", "step": 6559, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:40.016617", "step": 6559, "epoch": 3 }, { "type": "loss", "content": 0.005498126614838839, "timestamp": "2025-09-10 02:32:40.039894", "step": 6560, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:40.069334", "step": 6560, "epoch": 3 }, { "type": "loss", "content": 0.00023501995019614697, "timestamp": "2025-09-10 02:32:40.071051", "step": 6561, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:40.099980", "step": 6561, "epoch": 3 }, { "type": "loss", "content": 0.02420176938176155, "timestamp": "2025-09-10 02:32:40.101928", "step": 6562, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:40.131025", "step": 6562, "epoch": 3 }, { "type": "loss", "content": 0.00041470900760032237, "timestamp": "2025-09-10 02:32:40.133015", "step": 6563, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:40.162061", "step": 6563, "epoch": 3 }, { "type": "loss", "content": 0.04961610585451126, "timestamp": "2025-09-10 02:32:40.185165", "step": 6564, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:40.214342", "step": 6564, "epoch": 3 }, { "type": "loss", "content": 0.0017416634364053607, "timestamp": "2025-09-10 02:32:40.216253", "step": 6565, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:40.245846", "step": 6565, "epoch": 3 }, { "type": "loss", "content": 0.0006035008700564504, "timestamp": "2025-09-10 02:32:40.247768", "step": 6566, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:40.276591", "step": 6566, "epoch": 3 }, { "type": "loss", "content": 0.0019087980035692453, "timestamp": "2025-09-10 02:32:40.278495", "step": 6567, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:40.307859", "step": 6567, "epoch": 3 }, { "type": "loss", "content": 0.00014120301057118922, "timestamp": "2025-09-10 02:32:40.331062", "step": 6568, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:40.360052", "step": 6568, "epoch": 3 }, { "type": "loss", "content": 0.00040765447192825377, "timestamp": "2025-09-10 02:32:40.361603", "step": 6569, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:40.390390", "step": 6569, "epoch": 3 }, { "type": "loss", "content": 0.014113292098045349, "timestamp": "2025-09-10 02:32:40.392163", "step": 6570, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:40.421926", "step": 6570, "epoch": 3 }, { "type": "loss", "content": 0.0002716171438805759, "timestamp": "2025-09-10 02:32:40.423906", "step": 6571, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:40.453222", "step": 6571, "epoch": 3 }, { "type": "loss", "content": 0.002028391696512699, "timestamp": "2025-09-10 02:32:40.476530", "step": 6572, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:40.506084", "step": 6572, "epoch": 3 }, { "type": "loss", "content": 0.0002613048709463328, "timestamp": "2025-09-10 02:32:40.508009", "step": 6573, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:40.537230", "step": 6573, "epoch": 3 }, { "type": "loss", "content": 0.00010381250467617065, "timestamp": "2025-09-10 02:32:40.539128", "step": 6574, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:40.568099", "step": 6574, "epoch": 3 }, { "type": "loss", "content": 0.01973879523575306, "timestamp": "2025-09-10 02:32:40.569690", "step": 6575, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:40.598471", "step": 6575, "epoch": 3 }, { "type": "loss", "content": 0.00020547016174532473, "timestamp": "2025-09-10 02:32:40.622083", "step": 6576, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:40.651283", "step": 6576, "epoch": 3 }, { "type": "loss", "content": 0.004909324925392866, "timestamp": "2025-09-10 02:32:40.653336", "step": 6577, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:40.682739", "step": 6577, "epoch": 3 }, { "type": "loss", "content": 0.00044479951611720026, "timestamp": "2025-09-10 02:32:40.684392", "step": 6578, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:40.712954", "step": 6578, "epoch": 3 }, { "type": "loss", "content": 0.00040925919893197715, "timestamp": "2025-09-10 02:32:40.714889", "step": 6579, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:40.744283", "step": 6579, "epoch": 3 }, { "type": "loss", "content": 7.322274177568033e-05, "timestamp": "2025-09-10 02:32:40.767515", "step": 6580, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:40.801556", "step": 6580, "epoch": 3 }, { "type": "loss", "content": 0.00012532089021988213, "timestamp": "2025-09-10 02:32:40.803421", "step": 6581, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:32:40.832604", "step": 6581, "epoch": 3 }, { "type": "loss", "content": 0.0003361101262271404, "timestamp": "2025-09-10 02:32:40.834532", "step": 6582, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:40.863787", "step": 6582, "epoch": 3 }, { "type": "loss", "content": 0.05063135549426079, "timestamp": "2025-09-10 02:32:40.865402", "step": 6583, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:40.895272", "step": 6583, "epoch": 3 }, { "type": "loss", "content": 0.0002015512145590037, "timestamp": "2025-09-10 02:32:40.918681", "step": 6584, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:40.947702", "step": 6584, "epoch": 3 }, { "type": "loss", "content": 0.0013774631079286337, "timestamp": "2025-09-10 02:32:40.949864", "step": 6585, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:40.978564", "step": 6585, "epoch": 3 }, { "type": "loss", "content": 0.0018797398079186678, "timestamp": "2025-09-10 02:32:40.980364", "step": 6586, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:41.009098", "step": 6586, "epoch": 3 }, { "type": "loss", "content": 0.0001342600298812613, "timestamp": "2025-09-10 02:32:41.010843", "step": 6587, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:41.039672", "step": 6587, "epoch": 3 }, { "type": "loss", "content": 0.00042086574831046164, "timestamp": "2025-09-10 02:32:41.062935", "step": 6588, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:41.091727", "step": 6588, "epoch": 3 }, { "type": "loss", "content": 0.0003665420808829367, "timestamp": "2025-09-10 02:32:41.093733", "step": 6589, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:41.123185", "step": 6589, "epoch": 3 }, { "type": "loss", "content": 0.00016097302432172, "timestamp": "2025-09-10 02:32:41.125172", "step": 6590, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:41.154086", "step": 6590, "epoch": 3 }, { "type": "loss", "content": 0.0009196820901706815, "timestamp": "2025-09-10 02:32:41.156246", "step": 6591, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:41.186820", "step": 6591, "epoch": 3 }, { "type": "loss", "content": 0.02848992869257927, "timestamp": "2025-09-10 02:32:41.210264", "step": 6592, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:41.243348", "step": 6592, "epoch": 3 }, { "type": "loss", "content": 0.00047538583748973906, "timestamp": "2025-09-10 02:32:41.245298", "step": 6593, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:41.277785", "step": 6593, "epoch": 3 }, { "type": "loss", "content": 0.0005702796042896807, "timestamp": "2025-09-10 02:32:41.279570", "step": 6594, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:41.311696", "step": 6594, "epoch": 3 }, { "type": "loss", "content": 0.0009750212775543332, "timestamp": "2025-09-10 02:32:41.313599", "step": 6595, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:41.346892", "step": 6595, "epoch": 3 }, { "type": "loss", "content": 0.00544664915651083, "timestamp": "2025-09-10 02:32:41.370439", "step": 6596, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:41.405339", "step": 6596, "epoch": 3 }, { "type": "loss", "content": 0.0012348754098638892, "timestamp": "2025-09-10 02:32:41.407218", "step": 6597, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:41.440258", "step": 6597, "epoch": 3 }, { "type": "loss", "content": 0.00028111092979088426, "timestamp": "2025-09-10 02:32:41.442254", "step": 6598, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:41.476126", "step": 6598, "epoch": 3 }, { "type": "loss", "content": 0.0005132206133566797, "timestamp": "2025-09-10 02:32:41.478297", "step": 6599, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:41.512238", "step": 6599, "epoch": 3 }, { "type": "loss", "content": 0.0005630544037558138, "timestamp": "2025-09-10 02:32:41.535573", "step": 6600, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:41.568207", "step": 6600, "epoch": 3 }, { "type": "loss", "content": 0.00023868770222179592, "timestamp": "2025-09-10 02:32:41.569980", "step": 6601, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:41.603702", "step": 6601, "epoch": 3 }, { "type": "loss", "content": 0.0003321226977277547, "timestamp": "2025-09-10 02:32:41.605795", "step": 6602, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:32:41.639179", "step": 6602, "epoch": 3 }, { "type": "loss", "content": 0.000865118287038058, "timestamp": "2025-09-10 02:32:41.641142", "step": 6603, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:41.674160", "step": 6603, "epoch": 3 }, { "type": "loss", "content": 0.0004023853980470449, "timestamp": "2025-09-10 02:32:41.697349", "step": 6604, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:32:41.735098", "step": 6604, "epoch": 3 }, { "type": "loss", "content": 0.0004497812769841403, "timestamp": "2025-09-10 02:32:41.737087", "step": 6605, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:41.774016", "step": 6605, "epoch": 3 }, { "type": "loss", "content": 0.0006717675132676959, "timestamp": "2025-09-10 02:32:41.775665", "step": 6606, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:41.813970", "step": 6606, "epoch": 3 }, { "type": "loss", "content": 0.0006070249364711344, "timestamp": "2025-09-10 02:32:41.815776", "step": 6607, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:41.844643", "step": 6607, "epoch": 3 }, { "type": "loss", "content": 0.0029559044633060694, "timestamp": "2025-09-10 02:32:41.868019", "step": 6608, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:41.897415", "step": 6608, "epoch": 3 }, { "type": "loss", "content": 0.0004757153510581702, "timestamp": "2025-09-10 02:32:41.899334", "step": 6609, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:41.928411", "step": 6609, "epoch": 3 }, { "type": "loss", "content": 0.0022335194516927004, "timestamp": "2025-09-10 02:32:41.930182", "step": 6610, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:41.959185", "step": 6610, "epoch": 3 }, { "type": "loss", "content": 0.0005463598063215613, "timestamp": "2025-09-10 02:32:41.961073", "step": 6611, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:41.989945", "step": 6611, "epoch": 3 }, { "type": "loss", "content": 0.0003097263688687235, "timestamp": "2025-09-10 02:32:42.013333", "step": 6612, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:42.042882", "step": 6612, "epoch": 3 }, { "type": "loss", "content": 0.0001371066173305735, "timestamp": "2025-09-10 02:32:42.044964", "step": 6613, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:42.073937", "step": 6613, "epoch": 3 }, { "type": "loss", "content": 0.0014303690986707807, "timestamp": "2025-09-10 02:32:42.075792", "step": 6614, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:42.105205", "step": 6614, "epoch": 3 }, { "type": "loss", "content": 0.00017707170627545565, "timestamp": "2025-09-10 02:32:42.107216", "step": 6615, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:42.135925", "step": 6615, "epoch": 3 }, { "type": "loss", "content": 0.0003356249653734267, "timestamp": "2025-09-10 02:32:42.159272", "step": 6616, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:42.188698", "step": 6616, "epoch": 3 }, { "type": "loss", "content": 0.00032328421366401017, "timestamp": "2025-09-10 02:32:42.190854", "step": 6617, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:42.220122", "step": 6617, "epoch": 3 }, { "type": "loss", "content": 9.965641220333055e-05, "timestamp": "2025-09-10 02:32:42.221945", "step": 6618, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:42.250890", "step": 6618, "epoch": 3 }, { "type": "loss", "content": 0.0001957298518391326, "timestamp": "2025-09-10 02:32:42.252947", "step": 6619, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:42.281997", "step": 6619, "epoch": 3 }, { "type": "loss", "content": 0.013988674618303776, "timestamp": "2025-09-10 02:32:42.305367", "step": 6620, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:42.334830", "step": 6620, "epoch": 3 }, { "type": "loss", "content": 0.010370716452598572, "timestamp": "2025-09-10 02:32:42.336897", "step": 6621, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:42.365498", "step": 6621, "epoch": 3 }, { "type": "loss", "content": 0.004063849337399006, "timestamp": "2025-09-10 02:32:42.367526", "step": 6622, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:42.396707", "step": 6622, "epoch": 3 }, { "type": "loss", "content": 0.007541103754192591, "timestamp": "2025-09-10 02:32:42.398476", "step": 6623, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:42.427639", "step": 6623, "epoch": 3 }, { "type": "loss", "content": 0.0002139332063961774, "timestamp": "2025-09-10 02:32:42.451085", "step": 6624, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:42.480225", "step": 6624, "epoch": 3 }, { "type": "loss", "content": 0.00032632803777232766, "timestamp": "2025-09-10 02:32:42.481878", "step": 6625, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:42.510973", "step": 6625, "epoch": 3 }, { "type": "loss", "content": 0.004863189999014139, "timestamp": "2025-09-10 02:32:42.513305", "step": 6626, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:42.542756", "step": 6626, "epoch": 3 }, { "type": "loss", "content": 0.0023307842202484608, "timestamp": "2025-09-10 02:32:42.545066", "step": 6627, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:42.574190", "step": 6627, "epoch": 3 }, { "type": "loss", "content": 0.002439223462715745, "timestamp": "2025-09-10 02:32:42.598154", "step": 6628, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:42.627987", "step": 6628, "epoch": 3 }, { "type": "loss", "content": 0.0015195738524198532, "timestamp": "2025-09-10 02:32:42.630059", "step": 6629, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:42.660475", "step": 6629, "epoch": 3 }, { "type": "loss", "content": 0.0012575994478538632, "timestamp": "2025-09-10 02:32:42.662298", "step": 6630, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:42.691610", "step": 6630, "epoch": 3 }, { "type": "loss", "content": 0.0001379108871333301, "timestamp": "2025-09-10 02:32:42.693851", "step": 6631, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:42.723295", "step": 6631, "epoch": 3 }, { "type": "loss", "content": 0.0009422508883289993, "timestamp": "2025-09-10 02:32:42.746680", "step": 6632, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:42.778011", "step": 6632, "epoch": 3 }, { "type": "loss", "content": 0.00043333551730029285, "timestamp": "2025-09-10 02:32:42.779952", "step": 6633, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:42.815015", "step": 6633, "epoch": 3 }, { "type": "loss", "content": 0.0011779938358813524, "timestamp": "2025-09-10 02:32:42.817019", "step": 6634, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:42.846508", "step": 6634, "epoch": 3 }, { "type": "loss", "content": 0.0008157030097208917, "timestamp": "2025-09-10 02:32:42.848852", "step": 6635, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:42.877699", "step": 6635, "epoch": 3 }, { "type": "loss", "content": 0.0035045649856328964, "timestamp": "2025-09-10 02:32:42.901480", "step": 6636, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:42.930902", "step": 6636, "epoch": 3 }, { "type": "loss", "content": 0.0007069081766530871, "timestamp": "2025-09-10 02:32:42.933083", "step": 6637, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:42.962139", "step": 6637, "epoch": 3 }, { "type": "loss", "content": 0.00015712481399532408, "timestamp": "2025-09-10 02:32:42.964033", "step": 6638, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:42.992991", "step": 6638, "epoch": 3 }, { "type": "loss", "content": 0.000399542972445488, "timestamp": "2025-09-10 02:32:42.994806", "step": 6639, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:43.024557", "step": 6639, "epoch": 3 }, { "type": "loss", "content": 0.0020209425128996372, "timestamp": "2025-09-10 02:32:43.048111", "step": 6640, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:43.076830", "step": 6640, "epoch": 3 }, { "type": "loss", "content": 0.0005273325950838625, "timestamp": "2025-09-10 02:32:43.078963", "step": 6641, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:43.107792", "step": 6641, "epoch": 3 }, { "type": "loss", "content": 0.0003790934570133686, "timestamp": "2025-09-10 02:32:43.109667", "step": 6642, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:43.138547", "step": 6642, "epoch": 3 }, { "type": "loss", "content": 0.00045166281051933765, "timestamp": "2025-09-10 02:32:43.140675", "step": 6643, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:43.169541", "step": 6643, "epoch": 3 }, { "type": "loss", "content": 0.0004468052356969565, "timestamp": "2025-09-10 02:32:43.192909", "step": 6644, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:43.222133", "step": 6644, "epoch": 3 }, { "type": "loss", "content": 0.0004113587492611259, "timestamp": "2025-09-10 02:32:43.224060", "step": 6645, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:43.257499", "step": 6645, "epoch": 3 }, { "type": "loss", "content": 0.001634429907426238, "timestamp": "2025-09-10 02:32:43.259508", "step": 6646, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:43.289281", "step": 6646, "epoch": 3 }, { "type": "loss", "content": 0.00041421657078899443, "timestamp": "2025-09-10 02:32:43.291259", "step": 6647, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:32:43.320480", "step": 6647, "epoch": 3 }, { "type": "loss", "content": 0.0002726706152316183, "timestamp": "2025-09-10 02:32:43.344449", "step": 6648, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:43.375906", "step": 6648, "epoch": 3 }, { "type": "loss", "content": 0.003396217245608568, "timestamp": "2025-09-10 02:32:43.378085", "step": 6649, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:43.412597", "step": 6649, "epoch": 3 }, { "type": "loss", "content": 0.0006525564240291715, "timestamp": "2025-09-10 02:32:43.414876", "step": 6650, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:43.446559", "step": 6650, "epoch": 3 }, { "type": "loss", "content": 0.05705555900931358, "timestamp": "2025-09-10 02:32:43.448604", "step": 6651, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:43.482261", "step": 6651, "epoch": 3 }, { "type": "loss", "content": 0.00020800225320272148, "timestamp": "2025-09-10 02:32:43.505680", "step": 6652, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:43.536776", "step": 6652, "epoch": 3 }, { "type": "loss", "content": 0.00018046969489660114, "timestamp": "2025-09-10 02:32:43.538675", "step": 6653, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:43.571848", "step": 6653, "epoch": 3 }, { "type": "loss", "content": 0.0005861817044205964, "timestamp": "2025-09-10 02:32:43.574026", "step": 6654, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:43.607622", "step": 6654, "epoch": 3 }, { "type": "loss", "content": 0.0003875174734275788, "timestamp": "2025-09-10 02:32:43.610103", "step": 6655, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:43.642519", "step": 6655, "epoch": 3 }, { "type": "loss", "content": 0.00019350463117007166, "timestamp": "2025-09-10 02:32:43.666160", "step": 6656, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:43.704337", "step": 6656, "epoch": 3 }, { "type": "loss", "content": 0.00039134820690378547, "timestamp": "2025-09-10 02:32:43.706236", "step": 6657, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:43.744883", "step": 6657, "epoch": 3 }, { "type": "loss", "content": 0.0003388922195881605, "timestamp": "2025-09-10 02:32:43.746963", "step": 6658, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:43.782195", "step": 6658, "epoch": 3 }, { "type": "loss", "content": 0.0007815192802809179, "timestamp": "2025-09-10 02:32:43.784291", "step": 6659, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:43.819507", "step": 6659, "epoch": 3 }, { "type": "loss", "content": 0.0001469071430619806, "timestamp": "2025-09-10 02:32:43.843176", "step": 6660, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:43.873085", "step": 6660, "epoch": 3 }, { "type": "loss", "content": 0.00031934047001414, "timestamp": "2025-09-10 02:32:43.875043", "step": 6661, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:43.904140", "step": 6661, "epoch": 3 }, { "type": "loss", "content": 0.0002618095313664526, "timestamp": "2025-09-10 02:32:43.906259", "step": 6662, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:43.936075", "step": 6662, "epoch": 3 }, { "type": "loss", "content": 0.0004075879987794906, "timestamp": "2025-09-10 02:32:43.938038", "step": 6663, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:43.967028", "step": 6663, "epoch": 3 }, { "type": "loss", "content": 0.00012331274047028273, "timestamp": "2025-09-10 02:32:43.990803", "step": 6664, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:44.020118", "step": 6664, "epoch": 3 }, { "type": "loss", "content": 0.0001246247411472723, "timestamp": "2025-09-10 02:32:44.022686", "step": 6665, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:44.052044", "step": 6665, "epoch": 3 }, { "type": "loss", "content": 0.008284551091492176, "timestamp": "2025-09-10 02:32:44.053949", "step": 6666, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:44.082876", "step": 6666, "epoch": 3 }, { "type": "loss", "content": 0.0006869097123853862, "timestamp": "2025-09-10 02:32:44.084776", "step": 6667, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:44.113528", "step": 6667, "epoch": 3 }, { "type": "loss", "content": 0.021662766113877296, "timestamp": "2025-09-10 02:32:44.137101", "step": 6668, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:44.166537", "step": 6668, "epoch": 3 }, { "type": "loss", "content": 0.0006537624867632985, "timestamp": "2025-09-10 02:32:44.168617", "step": 6669, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:44.197870", "step": 6669, "epoch": 3 }, { "type": "loss", "content": 0.00017999236297328025, "timestamp": "2025-09-10 02:32:44.200010", "step": 6670, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:44.228986", "step": 6670, "epoch": 3 }, { "type": "loss", "content": 8.588766650063917e-05, "timestamp": "2025-09-10 02:32:44.231128", "step": 6671, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:44.261322", "step": 6671, "epoch": 3 }, { "type": "loss", "content": 0.0011683589546009898, "timestamp": "2025-09-10 02:32:44.284904", "step": 6672, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:44.314054", "step": 6672, "epoch": 3 }, { "type": "loss", "content": 0.0007778553408570588, "timestamp": "2025-09-10 02:32:44.316823", "step": 6673, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:44.347732", "step": 6673, "epoch": 3 }, { "type": "loss", "content": 0.0004378134326543659, "timestamp": "2025-09-10 02:32:44.349887", "step": 6674, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:44.380148", "step": 6674, "epoch": 3 }, { "type": "loss", "content": 0.010837412439286709, "timestamp": "2025-09-10 02:32:44.382113", "step": 6675, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:44.411187", "step": 6675, "epoch": 3 }, { "type": "loss", "content": 0.0008242077310569584, "timestamp": "2025-09-10 02:32:44.434681", "step": 6676, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:44.472534", "step": 6676, "epoch": 3 }, { "type": "loss", "content": 0.004866982344537973, "timestamp": "2025-09-10 02:32:44.474381", "step": 6677, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:44.504050", "step": 6677, "epoch": 3 }, { "type": "loss", "content": 0.00041917446651495993, "timestamp": "2025-09-10 02:32:44.506377", "step": 6678, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:44.535753", "step": 6678, "epoch": 3 }, { "type": "loss", "content": 0.00038965611020103097, "timestamp": "2025-09-10 02:32:44.537907", "step": 6679, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:44.566724", "step": 6679, "epoch": 3 }, { "type": "loss", "content": 0.017665987834334373, "timestamp": "2025-09-10 02:32:44.590396", "step": 6680, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:44.619671", "step": 6680, "epoch": 3 }, { "type": "loss", "content": 0.0001284900208702311, "timestamp": "2025-09-10 02:32:44.621786", "step": 6681, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:44.650665", "step": 6681, "epoch": 3 }, { "type": "loss", "content": 0.00014236057177186012, "timestamp": "2025-09-10 02:32:44.652852", "step": 6682, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:44.682151", "step": 6682, "epoch": 3 }, { "type": "loss", "content": 0.00045861300895921886, "timestamp": "2025-09-10 02:32:44.684166", "step": 6683, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:44.713031", "step": 6683, "epoch": 3 }, { "type": "loss", "content": 0.00014278704475145787, "timestamp": "2025-09-10 02:32:44.736796", "step": 6684, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:44.767318", "step": 6684, "epoch": 3 }, { "type": "loss", "content": 0.00020633505482692271, "timestamp": "2025-09-10 02:32:44.769420", "step": 6685, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:44.801789", "step": 6685, "epoch": 3 }, { "type": "loss", "content": 0.00016045241500250995, "timestamp": "2025-09-10 02:32:44.803672", "step": 6686, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:44.832409", "step": 6686, "epoch": 3 }, { "type": "loss", "content": 0.0002670391113497317, "timestamp": "2025-09-10 02:32:44.834991", "step": 6687, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:44.863688", "step": 6687, "epoch": 3 }, { "type": "loss", "content": 0.000776700850110501, "timestamp": "2025-09-10 02:32:44.887236", "step": 6688, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 96 ], "batch_size": 8, "flops": 1898404492032 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 5, 80 ], "batch_size": 8, "flops": 1582003754624 } ], "timestamp": "2025-09-10 02:32:46.773454", "step": 6688, "epoch": 3 }, { "type": "pplx", "content": 2634816.097719576, "timestamp": "2025-09-10 02:32:46.776015", "step": 6688, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:46.808653", "step": 6688, "epoch": 3 }, { "type": "loss", "content": 0.0011966631282120943, "timestamp": "2025-09-10 02:32:46.810605", "step": 6689, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:46.839851", "step": 6689, "epoch": 3 }, { "type": "loss", "content": 0.001301180454902351, "timestamp": "2025-09-10 02:32:46.841931", "step": 6690, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:46.870836", "step": 6690, "epoch": 3 }, { "type": "loss", "content": 0.00025518672191537917, "timestamp": "2025-09-10 02:32:46.872574", "step": 6691, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:46.901876", "step": 6691, "epoch": 3 }, { "type": "loss", "content": 0.00013390577805694193, "timestamp": "2025-09-10 02:32:46.926006", "step": 6692, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:46.955461", "step": 6692, "epoch": 3 }, { "type": "loss", "content": 0.002019841456785798, "timestamp": "2025-09-10 02:32:46.957657", "step": 6693, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:46.986484", "step": 6693, "epoch": 3 }, { "type": "loss", "content": 0.000430583517299965, "timestamp": "2025-09-10 02:32:46.988472", "step": 6694, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:47.018352", "step": 6694, "epoch": 3 }, { "type": "loss", "content": 0.00022048897517379373, "timestamp": "2025-09-10 02:32:47.020143", "step": 6695, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:47.049252", "step": 6695, "epoch": 3 }, { "type": "loss", "content": 0.009212544187903404, "timestamp": "2025-09-10 02:32:47.072661", "step": 6696, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:47.101348", "step": 6696, "epoch": 3 }, { "type": "loss", "content": 0.0011059354292228818, "timestamp": "2025-09-10 02:32:47.103412", "step": 6697, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:47.132522", "step": 6697, "epoch": 3 }, { "type": "loss", "content": 0.0013964826939627528, "timestamp": "2025-09-10 02:32:47.134236", "step": 6698, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:47.163221", "step": 6698, "epoch": 3 }, { "type": "loss", "content": 0.002260487526655197, "timestamp": "2025-09-10 02:32:47.165263", "step": 6699, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:47.194402", "step": 6699, "epoch": 3 }, { "type": "loss", "content": 0.0012373748468235135, "timestamp": "2025-09-10 02:32:47.217740", "step": 6700, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:47.251024", "step": 6700, "epoch": 3 }, { "type": "loss", "content": 0.00034490125835873187, "timestamp": "2025-09-10 02:32:47.253141", "step": 6701, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:47.283566", "step": 6701, "epoch": 3 }, { "type": "loss", "content": 0.00042592009413056076, "timestamp": "2025-09-10 02:32:47.285340", "step": 6702, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:47.315568", "step": 6702, "epoch": 3 }, { "type": "loss", "content": 7.375200948445126e-05, "timestamp": "2025-09-10 02:32:47.317531", "step": 6703, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:47.351570", "step": 6703, "epoch": 3 }, { "type": "loss", "content": 0.0007520223734900355, "timestamp": "2025-09-10 02:32:47.374923", "step": 6704, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:47.409795", "step": 6704, "epoch": 3 }, { "type": "loss", "content": 0.006365684326738119, "timestamp": "2025-09-10 02:32:47.411539", "step": 6705, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:47.443914", "step": 6705, "epoch": 3 }, { "type": "loss", "content": 0.014979584142565727, "timestamp": "2025-09-10 02:32:47.446028", "step": 6706, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:47.478362", "step": 6706, "epoch": 3 }, { "type": "loss", "content": 0.0001600277901161462, "timestamp": "2025-09-10 02:32:47.480782", "step": 6707, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:47.513289", "step": 6707, "epoch": 3 }, { "type": "loss", "content": 0.0006272908649407327, "timestamp": "2025-09-10 02:32:47.536742", "step": 6708, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:47.569708", "step": 6708, "epoch": 3 }, { "type": "loss", "content": 0.002627496374770999, "timestamp": "2025-09-10 02:32:47.571837", "step": 6709, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:47.605615", "step": 6709, "epoch": 3 }, { "type": "loss", "content": 6.385224696714431e-05, "timestamp": "2025-09-10 02:32:47.607902", "step": 6710, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:47.642766", "step": 6710, "epoch": 3 }, { "type": "loss", "content": 0.0007850642432458699, "timestamp": "2025-09-10 02:32:47.644590", "step": 6711, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:47.681506", "step": 6711, "epoch": 3 }, { "type": "loss", "content": 9.543353371554986e-05, "timestamp": "2025-09-10 02:32:47.704790", "step": 6712, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:47.742055", "step": 6712, "epoch": 3 }, { "type": "loss", "content": 0.00014451451716013253, "timestamp": "2025-09-10 02:32:47.745163", "step": 6713, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:47.779304", "step": 6713, "epoch": 3 }, { "type": "loss", "content": 0.007798864506185055, "timestamp": "2025-09-10 02:32:47.781069", "step": 6714, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:47.817635", "step": 6714, "epoch": 3 }, { "type": "loss", "content": 0.0004212721833027899, "timestamp": "2025-09-10 02:32:47.819793", "step": 6715, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:47.848957", "step": 6715, "epoch": 3 }, { "type": "loss", "content": 0.0014056451618671417, "timestamp": "2025-09-10 02:32:47.872700", "step": 6716, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:47.901858", "step": 6716, "epoch": 3 }, { "type": "loss", "content": 0.00029630190692842007, "timestamp": "2025-09-10 02:32:47.904792", "step": 6717, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:47.935506", "step": 6717, "epoch": 3 }, { "type": "loss", "content": 0.00025604027905501425, "timestamp": "2025-09-10 02:32:47.937638", "step": 6718, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:47.966390", "step": 6718, "epoch": 3 }, { "type": "loss", "content": 6.539438618347049e-05, "timestamp": "2025-09-10 02:32:47.968206", "step": 6719, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:47.997045", "step": 6719, "epoch": 3 }, { "type": "loss", "content": 0.00022632161562796682, "timestamp": "2025-09-10 02:32:48.020360", "step": 6720, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:48.049654", "step": 6720, "epoch": 3 }, { "type": "loss", "content": 0.0006399400299414992, "timestamp": "2025-09-10 02:32:48.051540", "step": 6721, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:48.080073", "step": 6721, "epoch": 3 }, { "type": "loss", "content": 0.00018006419122684747, "timestamp": "2025-09-10 02:32:48.081967", "step": 6722, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:48.110591", "step": 6722, "epoch": 3 }, { "type": "loss", "content": 0.00041571405017748475, "timestamp": "2025-09-10 02:32:48.112251", "step": 6723, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:48.141058", "step": 6723, "epoch": 3 }, { "type": "loss", "content": 0.02812386117875576, "timestamp": "2025-09-10 02:32:48.164605", "step": 6724, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:48.194003", "step": 6724, "epoch": 3 }, { "type": "loss", "content": 0.0004604542045854032, "timestamp": "2025-09-10 02:32:48.195892", "step": 6725, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:48.224642", "step": 6725, "epoch": 3 }, { "type": "loss", "content": 0.0003610174753703177, "timestamp": "2025-09-10 02:32:48.226406", "step": 6726, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:48.255245", "step": 6726, "epoch": 3 }, { "type": "loss", "content": 0.0011343618389219046, "timestamp": "2025-09-10 02:32:48.256988", "step": 6727, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:48.285554", "step": 6727, "epoch": 3 }, { "type": "loss", "content": 7.726944022579119e-05, "timestamp": "2025-09-10 02:32:48.308801", "step": 6728, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:48.337271", "step": 6728, "epoch": 3 }, { "type": "loss", "content": 0.0002777362533379346, "timestamp": "2025-09-10 02:32:48.339053", "step": 6729, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:48.368360", "step": 6729, "epoch": 3 }, { "type": "loss", "content": 8.940829138737172e-05, "timestamp": "2025-09-10 02:32:48.370496", "step": 6730, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:48.400395", "step": 6730, "epoch": 3 }, { "type": "loss", "content": 0.0007924687815830112, "timestamp": "2025-09-10 02:32:48.402189", "step": 6731, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:48.430994", "step": 6731, "epoch": 3 }, { "type": "loss", "content": 0.0001112060053856112, "timestamp": "2025-09-10 02:32:48.455663", "step": 6732, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:48.484326", "step": 6732, "epoch": 3 }, { "type": "loss", "content": 0.007602198980748653, "timestamp": "2025-09-10 02:32:48.485950", "step": 6733, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:48.514409", "step": 6733, "epoch": 3 }, { "type": "loss", "content": 0.00442105857655406, "timestamp": "2025-09-10 02:32:48.516385", "step": 6734, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:48.545639", "step": 6734, "epoch": 3 }, { "type": "loss", "content": 0.00020948487508576363, "timestamp": "2025-09-10 02:32:48.547564", "step": 6735, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:48.576704", "step": 6735, "epoch": 3 }, { "type": "loss", "content": 0.00018643557268660516, "timestamp": "2025-09-10 02:32:48.599729", "step": 6736, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:48.629362", "step": 6736, "epoch": 3 }, { "type": "loss", "content": 0.0019744031596928835, "timestamp": "2025-09-10 02:32:48.631682", "step": 6737, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:48.660652", "step": 6737, "epoch": 3 }, { "type": "loss", "content": 0.0001240462443092838, "timestamp": "2025-09-10 02:32:48.662601", "step": 6738, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:48.692886", "step": 6738, "epoch": 3 }, { "type": "loss", "content": 5.261231126496568e-05, "timestamp": "2025-09-10 02:32:48.694527", "step": 6739, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:48.723214", "step": 6739, "epoch": 3 }, { "type": "loss", "content": 0.00011474742495920509, "timestamp": "2025-09-10 02:32:48.746600", "step": 6740, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:48.777605", "step": 6740, "epoch": 3 }, { "type": "loss", "content": 5.554107701755129e-05, "timestamp": "2025-09-10 02:32:48.779672", "step": 6741, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:48.814010", "step": 6741, "epoch": 3 }, { "type": "loss", "content": 0.00044761571916751564, "timestamp": "2025-09-10 02:32:48.815753", "step": 6742, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:48.844806", "step": 6742, "epoch": 3 }, { "type": "loss", "content": 9.237445192411542e-05, "timestamp": "2025-09-10 02:32:48.846498", "step": 6743, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:48.875127", "step": 6743, "epoch": 3 }, { "type": "loss", "content": 7.44791905162856e-05, "timestamp": "2025-09-10 02:32:48.898776", "step": 6744, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:48.928586", "step": 6744, "epoch": 3 }, { "type": "loss", "content": 0.0011504106223583221, "timestamp": "2025-09-10 02:32:48.930547", "step": 6745, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:48.959523", "step": 6745, "epoch": 3 }, { "type": "loss", "content": 0.0004857448220718652, "timestamp": "2025-09-10 02:32:48.961239", "step": 6746, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:48.989967", "step": 6746, "epoch": 3 }, { "type": "loss", "content": 0.00015927580534480512, "timestamp": "2025-09-10 02:32:48.991709", "step": 6747, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:49.021478", "step": 6747, "epoch": 3 }, { "type": "loss", "content": 0.0001082676462829113, "timestamp": "2025-09-10 02:32:49.044731", "step": 6748, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:49.073802", "step": 6748, "epoch": 3 }, { "type": "loss", "content": 0.00010946422844426706, "timestamp": "2025-09-10 02:32:49.075497", "step": 6749, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:49.103969", "step": 6749, "epoch": 3 }, { "type": "loss", "content": 0.0020970015320926905, "timestamp": "2025-09-10 02:32:49.105757", "step": 6750, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:49.134879", "step": 6750, "epoch": 3 }, { "type": "loss", "content": 0.0008264260250143707, "timestamp": "2025-09-10 02:32:49.136488", "step": 6751, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:49.165267", "step": 6751, "epoch": 3 }, { "type": "loss", "content": 0.00023407131084240973, "timestamp": "2025-09-10 02:32:49.188596", "step": 6752, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:49.218145", "step": 6752, "epoch": 3 }, { "type": "loss", "content": 0.0007170811877585948, "timestamp": "2025-09-10 02:32:49.219827", "step": 6753, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:49.253911", "step": 6753, "epoch": 3 }, { "type": "loss", "content": 0.00010154004121432081, "timestamp": "2025-09-10 02:32:49.255980", "step": 6754, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:49.285659", "step": 6754, "epoch": 3 }, { "type": "loss", "content": 0.00010521234798943624, "timestamp": "2025-09-10 02:32:49.287228", "step": 6755, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:49.317031", "step": 6755, "epoch": 3 }, { "type": "loss", "content": 6.685069820377976e-05, "timestamp": "2025-09-10 02:32:49.340383", "step": 6756, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:49.370267", "step": 6756, "epoch": 3 }, { "type": "loss", "content": 0.0010209261672571301, "timestamp": "2025-09-10 02:32:49.373652", "step": 6757, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:49.409611", "step": 6757, "epoch": 3 }, { "type": "loss", "content": 0.0022340952418744564, "timestamp": "2025-09-10 02:32:49.411537", "step": 6758, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:49.444188", "step": 6758, "epoch": 3 }, { "type": "loss", "content": 0.00013346783816814423, "timestamp": "2025-09-10 02:32:49.446253", "step": 6759, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:49.478474", "step": 6759, "epoch": 3 }, { "type": "loss", "content": 0.0002406542480457574, "timestamp": "2025-09-10 02:32:49.502618", "step": 6760, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:49.534866", "step": 6760, "epoch": 3 }, { "type": "loss", "content": 6.465790647780523e-05, "timestamp": "2025-09-10 02:32:49.536769", "step": 6761, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:49.568879", "step": 6761, "epoch": 3 }, { "type": "loss", "content": 0.0006072352989576757, "timestamp": "2025-09-10 02:32:49.570545", "step": 6762, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:49.605203", "step": 6762, "epoch": 3 }, { "type": "loss", "content": 5.9133617469342425e-05, "timestamp": "2025-09-10 02:32:49.607480", "step": 6763, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:49.641949", "step": 6763, "epoch": 3 }, { "type": "loss", "content": 0.0001183756030513905, "timestamp": "2025-09-10 02:32:49.665475", "step": 6764, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:49.703370", "step": 6764, "epoch": 3 }, { "type": "loss", "content": 3.824655505013652e-05, "timestamp": "2025-09-10 02:32:49.705081", "step": 6765, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:49.744919", "step": 6765, "epoch": 3 }, { "type": "loss", "content": 0.0017311256378889084, "timestamp": "2025-09-10 02:32:49.746903", "step": 6766, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:49.782135", "step": 6766, "epoch": 3 }, { "type": "loss", "content": 7.348317740252241e-05, "timestamp": "2025-09-10 02:32:49.783845", "step": 6767, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:49.820374", "step": 6767, "epoch": 3 }, { "type": "loss", "content": 0.009175836108624935, "timestamp": "2025-09-10 02:32:49.844029", "step": 6768, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:49.873488", "step": 6768, "epoch": 3 }, { "type": "loss", "content": 0.0003108904347755015, "timestamp": "2025-09-10 02:32:49.875733", "step": 6769, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:49.908957", "step": 6769, "epoch": 3 }, { "type": "loss", "content": 0.0001275867980439216, "timestamp": "2025-09-10 02:32:49.910739", "step": 6770, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:32:49.939557", "step": 6770, "epoch": 3 }, { "type": "loss", "content": 0.0001226141321239993, "timestamp": "2025-09-10 02:32:49.941820", "step": 6771, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:49.970800", "step": 6771, "epoch": 3 }, { "type": "loss", "content": 0.00014986835594754666, "timestamp": "2025-09-10 02:32:49.994859", "step": 6772, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:50.023459", "step": 6772, "epoch": 3 }, { "type": "loss", "content": 0.0015205388190224767, "timestamp": "2025-09-10 02:32:50.025295", "step": 6773, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:50.054534", "step": 6773, "epoch": 3 }, { "type": "loss", "content": 0.00012730220623780042, "timestamp": "2025-09-10 02:32:50.056541", "step": 6774, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:50.085130", "step": 6774, "epoch": 3 }, { "type": "loss", "content": 5.468766175908968e-05, "timestamp": "2025-09-10 02:32:50.087955", "step": 6775, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:32:50.117227", "step": 6775, "epoch": 3 }, { "type": "loss", "content": 0.00021866762835998088, "timestamp": "2025-09-10 02:32:50.141465", "step": 6776, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:50.171225", "step": 6776, "epoch": 3 }, { "type": "loss", "content": 7.189018651843071e-05, "timestamp": "2025-09-10 02:32:50.173282", "step": 6777, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:50.202948", "step": 6777, "epoch": 3 }, { "type": "loss", "content": 0.00012346556468401104, "timestamp": "2025-09-10 02:32:50.205449", "step": 6778, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:50.235533", "step": 6778, "epoch": 3 }, { "type": "loss", "content": 0.005750373005867004, "timestamp": "2025-09-10 02:32:50.237940", "step": 6779, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:50.267609", "step": 6779, "epoch": 3 }, { "type": "loss", "content": 0.0001830299006542191, "timestamp": "2025-09-10 02:32:50.291515", "step": 6780, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:50.321349", "step": 6780, "epoch": 3 }, { "type": "loss", "content": 0.00012977873848285526, "timestamp": "2025-09-10 02:32:50.323390", "step": 6781, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:50.352546", "step": 6781, "epoch": 3 }, { "type": "loss", "content": 0.0009929948719218373, "timestamp": "2025-09-10 02:32:50.355368", "step": 6782, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:50.384729", "step": 6782, "epoch": 3 }, { "type": "loss", "content": 0.057896532118320465, "timestamp": "2025-09-10 02:32:50.386951", "step": 6783, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:50.416111", "step": 6783, "epoch": 3 }, { "type": "loss", "content": 8.889515447663143e-05, "timestamp": "2025-09-10 02:32:50.439572", "step": 6784, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:50.468594", "step": 6784, "epoch": 3 }, { "type": "loss", "content": 0.0056881383061409, "timestamp": "2025-09-10 02:32:50.470667", "step": 6785, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:50.499527", "step": 6785, "epoch": 3 }, { "type": "loss", "content": 0.0003143739595543593, "timestamp": "2025-09-10 02:32:50.501432", "step": 6786, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:50.531118", "step": 6786, "epoch": 3 }, { "type": "loss", "content": 0.0001031060965033248, "timestamp": "2025-09-10 02:32:50.533137", "step": 6787, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:50.562618", "step": 6787, "epoch": 3 }, { "type": "loss", "content": 0.007194445002824068, "timestamp": "2025-09-10 02:32:50.586210", "step": 6788, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:50.615536", "step": 6788, "epoch": 3 }, { "type": "loss", "content": 9.384167788084596e-05, "timestamp": "2025-09-10 02:32:50.617392", "step": 6789, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:50.646680", "step": 6789, "epoch": 3 }, { "type": "loss", "content": 5.198954386287369e-05, "timestamp": "2025-09-10 02:32:50.648891", "step": 6790, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:50.678812", "step": 6790, "epoch": 3 }, { "type": "loss", "content": 0.00022589776199311018, "timestamp": "2025-09-10 02:32:50.680809", "step": 6791, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:50.709765", "step": 6791, "epoch": 3 }, { "type": "loss", "content": 9.448708442505449e-05, "timestamp": "2025-09-10 02:32:50.733374", "step": 6792, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:50.762839", "step": 6792, "epoch": 3 }, { "type": "loss", "content": 0.00011150127829751, "timestamp": "2025-09-10 02:32:50.764783", "step": 6793, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:50.796887", "step": 6793, "epoch": 3 }, { "type": "loss", "content": 0.0001500972721260041, "timestamp": "2025-09-10 02:32:50.798827", "step": 6794, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:50.828818", "step": 6794, "epoch": 3 }, { "type": "loss", "content": 5.231571412878111e-05, "timestamp": "2025-09-10 02:32:50.831029", "step": 6795, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:50.860183", "step": 6795, "epoch": 3 }, { "type": "loss", "content": 0.0001222932041855529, "timestamp": "2025-09-10 02:32:50.884457", "step": 6796, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:50.916887", "step": 6796, "epoch": 3 }, { "type": "loss", "content": 7.24709389032796e-05, "timestamp": "2025-09-10 02:32:50.919035", "step": 6797, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:50.949587", "step": 6797, "epoch": 3 }, { "type": "loss", "content": 0.00015622669889125973, "timestamp": "2025-09-10 02:32:50.951594", "step": 6798, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:50.980638", "step": 6798, "epoch": 3 }, { "type": "loss", "content": 0.00018378280219621956, "timestamp": "2025-09-10 02:32:50.982808", "step": 6799, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:51.012392", "step": 6799, "epoch": 3 }, { "type": "loss", "content": 0.0005293177091516554, "timestamp": "2025-09-10 02:32:51.035914", "step": 6800, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:51.065087", "step": 6800, "epoch": 3 }, { "type": "loss", "content": 0.004148623440414667, "timestamp": "2025-09-10 02:32:51.067138", "step": 6801, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:51.095993", "step": 6801, "epoch": 3 }, { "type": "loss", "content": 0.02354315109550953, "timestamp": "2025-09-10 02:32:51.098079", "step": 6802, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:51.126948", "step": 6802, "epoch": 3 }, { "type": "loss", "content": 0.00010290476348018274, "timestamp": "2025-09-10 02:32:51.129211", "step": 6803, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:51.158673", "step": 6803, "epoch": 3 }, { "type": "loss", "content": 3.785776789300144e-05, "timestamp": "2025-09-10 02:32:51.182185", "step": 6804, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:51.211353", "step": 6804, "epoch": 3 }, { "type": "loss", "content": 3.411351644899696e-05, "timestamp": "2025-09-10 02:32:51.213562", "step": 6805, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:51.247457", "step": 6805, "epoch": 3 }, { "type": "loss", "content": 0.000211441089049913, "timestamp": "2025-09-10 02:32:51.249652", "step": 6806, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:51.281336", "step": 6806, "epoch": 3 }, { "type": "loss", "content": 0.0008072297205217183, "timestamp": "2025-09-10 02:32:51.283381", "step": 6807, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:51.314149", "step": 6807, "epoch": 3 }, { "type": "loss", "content": 0.012630701996386051, "timestamp": "2025-09-10 02:32:51.337681", "step": 6808, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:51.367604", "step": 6808, "epoch": 3 }, { "type": "loss", "content": 0.0003062119649257511, "timestamp": "2025-09-10 02:32:51.369837", "step": 6809, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:51.402890", "step": 6809, "epoch": 3 }, { "type": "loss", "content": 4.283956513972953e-05, "timestamp": "2025-09-10 02:32:51.405172", "step": 6810, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:51.439422", "step": 6810, "epoch": 3 }, { "type": "loss", "content": 0.00022908160462975502, "timestamp": "2025-09-10 02:32:51.441507", "step": 6811, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:51.474207", "step": 6811, "epoch": 3 }, { "type": "loss", "content": 0.0009302693651989102, "timestamp": "2025-09-10 02:32:51.497803", "step": 6812, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:51.531350", "step": 6812, "epoch": 3 }, { "type": "loss", "content": 0.0002662238839548081, "timestamp": "2025-09-10 02:32:51.533449", "step": 6813, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:51.567134", "step": 6813, "epoch": 3 }, { "type": "loss", "content": 0.0683477595448494, "timestamp": "2025-09-10 02:32:51.569031", "step": 6814, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:51.603651", "step": 6814, "epoch": 3 }, { "type": "loss", "content": 0.0001606199366506189, "timestamp": "2025-09-10 02:32:51.605947", "step": 6815, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:51.640584", "step": 6815, "epoch": 3 }, { "type": "loss", "content": 0.0006426494219340384, "timestamp": "2025-09-10 02:32:51.664323", "step": 6816, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:51.702821", "step": 6816, "epoch": 3 }, { "type": "loss", "content": 7.264408486662433e-05, "timestamp": "2025-09-10 02:32:51.705071", "step": 6817, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:51.743347", "step": 6817, "epoch": 3 }, { "type": "loss", "content": 6.33458184893243e-05, "timestamp": "2025-09-10 02:32:51.745456", "step": 6818, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:51.780619", "step": 6818, "epoch": 3 }, { "type": "loss", "content": 8.742232603253797e-05, "timestamp": "2025-09-10 02:32:51.782711", "step": 6819, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:51.819675", "step": 6819, "epoch": 3 }, { "type": "loss", "content": 9.838306868914515e-05, "timestamp": "2025-09-10 02:32:51.843467", "step": 6820, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:51.872891", "step": 6820, "epoch": 3 }, { "type": "loss", "content": 0.0002053830394288525, "timestamp": "2025-09-10 02:32:51.875024", "step": 6821, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:51.904244", "step": 6821, "epoch": 3 }, { "type": "loss", "content": 4.579432788887061e-05, "timestamp": "2025-09-10 02:32:51.906357", "step": 6822, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:51.935633", "step": 6822, "epoch": 3 }, { "type": "loss", "content": 0.00019780623551923782, "timestamp": "2025-09-10 02:32:51.937825", "step": 6823, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:51.968308", "step": 6823, "epoch": 3 }, { "type": "loss", "content": 0.00027067208429798484, "timestamp": "2025-09-10 02:32:51.992018", "step": 6824, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:52.026939", "step": 6824, "epoch": 3 }, { "type": "loss", "content": 5.8917088608723134e-05, "timestamp": "2025-09-10 02:32:52.029124", "step": 6825, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:52.058974", "step": 6825, "epoch": 3 }, { "type": "loss", "content": 0.00013341773592401296, "timestamp": "2025-09-10 02:32:52.061170", "step": 6826, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:52.090966", "step": 6826, "epoch": 3 }, { "type": "loss", "content": 0.0039385221898555756, "timestamp": "2025-09-10 02:32:52.093065", "step": 6827, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:52.122588", "step": 6827, "epoch": 3 }, { "type": "loss", "content": 6.967805529711768e-05, "timestamp": "2025-09-10 02:32:52.146106", "step": 6828, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:52.176423", "step": 6828, "epoch": 3 }, { "type": "loss", "content": 8.656596764922142e-05, "timestamp": "2025-09-10 02:32:52.178249", "step": 6829, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:52.207182", "step": 6829, "epoch": 3 }, { "type": "loss", "content": 0.0003680096997413784, "timestamp": "2025-09-10 02:32:52.209075", "step": 6830, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:52.239013", "step": 6830, "epoch": 3 }, { "type": "loss", "content": 0.00017694065172690898, "timestamp": "2025-09-10 02:32:52.241170", "step": 6831, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:52.270142", "step": 6831, "epoch": 3 }, { "type": "loss", "content": 0.00028491156990639865, "timestamp": "2025-09-10 02:32:52.293973", "step": 6832, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:52.324242", "step": 6832, "epoch": 3 }, { "type": "loss", "content": 0.00012682615488301963, "timestamp": "2025-09-10 02:32:52.326346", "step": 6833, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:52.355975", "step": 6833, "epoch": 3 }, { "type": "loss", "content": 0.00012047600466758013, "timestamp": "2025-09-10 02:32:52.357957", "step": 6834, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:52.386930", "step": 6834, "epoch": 3 }, { "type": "loss", "content": 0.00045988403144292533, "timestamp": "2025-09-10 02:32:52.389019", "step": 6835, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:52.418179", "step": 6835, "epoch": 3 }, { "type": "loss", "content": 0.0002953163639176637, "timestamp": "2025-09-10 02:32:52.441855", "step": 6836, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:52.471362", "step": 6836, "epoch": 3 }, { "type": "loss", "content": 8.624989277450368e-05, "timestamp": "2025-09-10 02:32:52.473326", "step": 6837, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:52.502640", "step": 6837, "epoch": 3 }, { "type": "loss", "content": 0.0001436011807527393, "timestamp": "2025-09-10 02:32:52.504843", "step": 6838, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:52.533797", "step": 6838, "epoch": 3 }, { "type": "loss", "content": 0.004798574838787317, "timestamp": "2025-09-10 02:32:52.535918", "step": 6839, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:52.565034", "step": 6839, "epoch": 3 }, { "type": "loss", "content": 0.0016028692480176687, "timestamp": "2025-09-10 02:32:52.588874", "step": 6840, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 96 ], "batch_size": 8, "flops": 1898404492032 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 5, 80 ], "batch_size": 8, "flops": 1582003754624 } ], "timestamp": "2025-09-10 02:32:54.471541", "step": 6840, "epoch": 3 }, { "type": "pplx", "content": 2506378.380704593, "timestamp": "2025-09-10 02:32:54.473353", "step": 6840, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:54.501762", "step": 6840, "epoch": 3 }, { "type": "loss", "content": 0.0004131238965783268, "timestamp": "2025-09-10 02:32:54.503360", "step": 6841, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:54.533279", "step": 6841, "epoch": 3 }, { "type": "loss", "content": 0.002611250150948763, "timestamp": "2025-09-10 02:32:54.535221", "step": 6842, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:54.564553", "step": 6842, "epoch": 3 }, { "type": "loss", "content": 0.025640210136771202, "timestamp": "2025-09-10 02:32:54.566365", "step": 6843, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:54.596063", "step": 6843, "epoch": 3 }, { "type": "loss", "content": 0.00010162144462810829, "timestamp": "2025-09-10 02:32:54.619813", "step": 6844, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:54.650061", "step": 6844, "epoch": 3 }, { "type": "loss", "content": 0.00016020890325307846, "timestamp": "2025-09-10 02:32:54.651682", "step": 6845, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:54.680443", "step": 6845, "epoch": 3 }, { "type": "loss", "content": 0.0006879153079353273, "timestamp": "2025-09-10 02:32:54.682265", "step": 6846, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:54.711305", "step": 6846, "epoch": 3 }, { "type": "loss", "content": 0.0020874382462352514, "timestamp": "2025-09-10 02:32:54.713475", "step": 6847, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:54.742452", "step": 6847, "epoch": 3 }, { "type": "loss", "content": 0.00032064953120425344, "timestamp": "2025-09-10 02:32:54.765954", "step": 6848, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:54.798185", "step": 6848, "epoch": 3 }, { "type": "loss", "content": 7.953395834192634e-05, "timestamp": "2025-09-10 02:32:54.800120", "step": 6849, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:54.829865", "step": 6849, "epoch": 3 }, { "type": "loss", "content": 0.001108153141103685, "timestamp": "2025-09-10 02:32:54.831564", "step": 6850, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:54.860731", "step": 6850, "epoch": 3 }, { "type": "loss", "content": 0.00035554394708015025, "timestamp": "2025-09-10 02:32:54.862631", "step": 6851, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:54.891694", "step": 6851, "epoch": 3 }, { "type": "loss", "content": 9.832141950028017e-05, "timestamp": "2025-09-10 02:32:54.915052", "step": 6852, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:54.944091", "step": 6852, "epoch": 3 }, { "type": "loss", "content": 7.806030043866485e-05, "timestamp": "2025-09-10 02:32:54.945816", "step": 6853, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:32:54.974879", "step": 6853, "epoch": 3 }, { "type": "loss", "content": 0.035263847559690475, "timestamp": "2025-09-10 02:32:54.976914", "step": 6854, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:55.006367", "step": 6854, "epoch": 3 }, { "type": "loss", "content": 0.006878325249999762, "timestamp": "2025-09-10 02:32:55.008257", "step": 6855, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:55.037134", "step": 6855, "epoch": 3 }, { "type": "loss", "content": 0.0003121345944236964, "timestamp": "2025-09-10 02:32:55.060664", "step": 6856, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:55.090180", "step": 6856, "epoch": 3 }, { "type": "loss", "content": 0.0030188935343176126, "timestamp": "2025-09-10 02:32:55.092033", "step": 6857, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:55.121012", "step": 6857, "epoch": 3 }, { "type": "loss", "content": 0.011274388059973717, "timestamp": "2025-09-10 02:32:55.122826", "step": 6858, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:55.151502", "step": 6858, "epoch": 3 }, { "type": "loss", "content": 0.00012130722461733967, "timestamp": "2025-09-10 02:32:55.153221", "step": 6859, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:55.183550", "step": 6859, "epoch": 3 }, { "type": "loss", "content": 9.649340790929273e-05, "timestamp": "2025-09-10 02:32:55.207061", "step": 6860, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:55.235719", "step": 6860, "epoch": 3 }, { "type": "loss", "content": 0.00036928398185409606, "timestamp": "2025-09-10 02:32:55.237470", "step": 6861, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:55.270406", "step": 6861, "epoch": 3 }, { "type": "loss", "content": 0.0006278843502514064, "timestamp": "2025-09-10 02:32:55.272139", "step": 6862, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:55.304055", "step": 6862, "epoch": 3 }, { "type": "loss", "content": 0.0012702380772680044, "timestamp": "2025-09-10 02:32:55.314841", "step": 6863, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:55.357629", "step": 6863, "epoch": 3 }, { "type": "loss", "content": 0.00018065668700728565, "timestamp": "2025-09-10 02:32:55.380883", "step": 6864, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:55.413807", "step": 6864, "epoch": 3 }, { "type": "loss", "content": 7.278566772583872e-05, "timestamp": "2025-09-10 02:32:55.415423", "step": 6865, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:55.447210", "step": 6865, "epoch": 3 }, { "type": "loss", "content": 0.00028578523779287934, "timestamp": "2025-09-10 02:32:55.448828", "step": 6866, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:55.481568", "step": 6866, "epoch": 3 }, { "type": "loss", "content": 3.5654356906889006e-05, "timestamp": "2025-09-10 02:32:55.483478", "step": 6867, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:55.515472", "step": 6867, "epoch": 3 }, { "type": "loss", "content": 0.00021527970966417342, "timestamp": "2025-09-10 02:32:55.540965", "step": 6868, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:55.575896", "step": 6868, "epoch": 3 }, { "type": "loss", "content": 0.0009781593689695, "timestamp": "2025-09-10 02:32:55.577919", "step": 6869, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:55.613728", "step": 6869, "epoch": 3 }, { "type": "loss", "content": 7.875356823205948e-05, "timestamp": "2025-09-10 02:32:55.615435", "step": 6870, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:55.647253", "step": 6870, "epoch": 3 }, { "type": "loss", "content": 9.778184903552756e-05, "timestamp": "2025-09-10 02:32:55.649054", "step": 6871, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:55.692917", "step": 6871, "epoch": 3 }, { "type": "loss", "content": 0.0014271338004618883, "timestamp": "2025-09-10 02:32:55.716419", "step": 6872, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:55.752675", "step": 6872, "epoch": 3 }, { "type": "loss", "content": 9.694429900264367e-05, "timestamp": "2025-09-10 02:32:55.760824", "step": 6873, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:55.806388", "step": 6873, "epoch": 3 }, { "type": "loss", "content": 0.0006887580966576934, "timestamp": "2025-09-10 02:32:55.808118", "step": 6874, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:55.836724", "step": 6874, "epoch": 3 }, { "type": "loss", "content": 7.986043783603236e-05, "timestamp": "2025-09-10 02:32:55.838426", "step": 6875, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:55.868823", "step": 6875, "epoch": 3 }, { "type": "loss", "content": 0.0004202057025395334, "timestamp": "2025-09-10 02:32:55.892226", "step": 6876, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:55.922324", "step": 6876, "epoch": 3 }, { "type": "loss", "content": 0.006292398553341627, "timestamp": "2025-09-10 02:32:55.924247", "step": 6877, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:55.953390", "step": 6877, "epoch": 3 }, { "type": "loss", "content": 0.00038204342126846313, "timestamp": "2025-09-10 02:32:55.961561", "step": 6878, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:32:56.000325", "step": 6878, "epoch": 3 }, { "type": "loss", "content": 0.00032601208658888936, "timestamp": "2025-09-10 02:32:56.001989", "step": 6879, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:56.030910", "step": 6879, "epoch": 3 }, { "type": "loss", "content": 0.00011965764861088246, "timestamp": "2025-09-10 02:32:56.054270", "step": 6880, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:56.097814", "step": 6880, "epoch": 3 }, { "type": "loss", "content": 0.00011441412789281458, "timestamp": "2025-09-10 02:32:56.103750", "step": 6881, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:56.133193", "step": 6881, "epoch": 3 }, { "type": "loss", "content": 0.02021711878478527, "timestamp": "2025-09-10 02:32:56.134830", "step": 6882, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:56.168973", "step": 6882, "epoch": 3 }, { "type": "loss", "content": 0.0012548385420814157, "timestamp": "2025-09-10 02:32:56.170980", "step": 6883, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:56.199905", "step": 6883, "epoch": 3 }, { "type": "loss", "content": 0.0006658626953139901, "timestamp": "2025-09-10 02:32:56.223227", "step": 6884, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:56.252247", "step": 6884, "epoch": 3 }, { "type": "loss", "content": 0.03124235011637211, "timestamp": "2025-09-10 02:32:56.253886", "step": 6885, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:56.282630", "step": 6885, "epoch": 3 }, { "type": "loss", "content": 0.0007810547831468284, "timestamp": "2025-09-10 02:32:56.284078", "step": 6886, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:56.312694", "step": 6886, "epoch": 3 }, { "type": "loss", "content": 4.096954216947779e-05, "timestamp": "2025-09-10 02:32:56.314337", "step": 6887, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:56.342787", "step": 6887, "epoch": 3 }, { "type": "loss", "content": 9.618890180718154e-05, "timestamp": "2025-09-10 02:32:56.366212", "step": 6888, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:56.395329", "step": 6888, "epoch": 3 }, { "type": "loss", "content": 0.00043241720413789153, "timestamp": "2025-09-10 02:32:56.397012", "step": 6889, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:56.425907", "step": 6889, "epoch": 3 }, { "type": "loss", "content": 0.0001073941239155829, "timestamp": "2025-09-10 02:32:56.427970", "step": 6890, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:56.456643", "step": 6890, "epoch": 3 }, { "type": "loss", "content": 8.71215743245557e-05, "timestamp": "2025-09-10 02:32:56.458649", "step": 6891, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:56.487801", "step": 6891, "epoch": 3 }, { "type": "loss", "content": 0.0012700591469183564, "timestamp": "2025-09-10 02:32:56.511353", "step": 6892, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:56.540092", "step": 6892, "epoch": 3 }, { "type": "loss", "content": 0.09516288340091705, "timestamp": "2025-09-10 02:32:56.541939", "step": 6893, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:56.571189", "step": 6893, "epoch": 3 }, { "type": "loss", "content": 0.0011443132534623146, "timestamp": "2025-09-10 02:32:56.573171", "step": 6894, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:56.602204", "step": 6894, "epoch": 3 }, { "type": "loss", "content": 8.956807141657919e-05, "timestamp": "2025-09-10 02:32:56.604022", "step": 6895, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:56.633564", "step": 6895, "epoch": 3 }, { "type": "loss", "content": 0.001971913268789649, "timestamp": "2025-09-10 02:32:56.656979", "step": 6896, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:56.685950", "step": 6896, "epoch": 3 }, { "type": "loss", "content": 0.00021504145115613937, "timestamp": "2025-09-10 02:32:56.687876", "step": 6897, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:56.716539", "step": 6897, "epoch": 3 }, { "type": "loss", "content": 7.672259380342439e-05, "timestamp": "2025-09-10 02:32:56.718730", "step": 6898, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:56.747996", "step": 6898, "epoch": 3 }, { "type": "loss", "content": 0.00014145906607154757, "timestamp": "2025-09-10 02:32:56.749894", "step": 6899, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:56.781479", "step": 6899, "epoch": 3 }, { "type": "loss", "content": 8.570096542825922e-05, "timestamp": "2025-09-10 02:32:56.804855", "step": 6900, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:56.833623", "step": 6900, "epoch": 3 }, { "type": "loss", "content": 0.00012143644562456757, "timestamp": "2025-09-10 02:32:56.835406", "step": 6901, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:56.864335", "step": 6901, "epoch": 3 }, { "type": "loss", "content": 0.0001100988665712066, "timestamp": "2025-09-10 02:32:56.866155", "step": 6902, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:56.895306", "step": 6902, "epoch": 3 }, { "type": "loss", "content": 0.008772674016654491, "timestamp": "2025-09-10 02:32:56.897023", "step": 6903, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:56.926271", "step": 6903, "epoch": 3 }, { "type": "loss", "content": 0.00016054752632044256, "timestamp": "2025-09-10 02:32:56.949762", "step": 6904, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:56.979384", "step": 6904, "epoch": 3 }, { "type": "loss", "content": 7.028750405879691e-05, "timestamp": "2025-09-10 02:32:56.981063", "step": 6905, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:57.010381", "step": 6905, "epoch": 3 }, { "type": "loss", "content": 0.06985701620578766, "timestamp": "2025-09-10 02:32:57.012016", "step": 6906, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:57.040903", "step": 6906, "epoch": 3 }, { "type": "loss", "content": 0.0037340200506150723, "timestamp": "2025-09-10 02:32:57.042644", "step": 6907, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:57.071692", "step": 6907, "epoch": 3 }, { "type": "loss", "content": 0.00045398736256174743, "timestamp": "2025-09-10 02:32:57.095336", "step": 6908, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:57.125388", "step": 6908, "epoch": 3 }, { "type": "loss", "content": 0.000166809419170022, "timestamp": "2025-09-10 02:32:57.127196", "step": 6909, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:57.156540", "step": 6909, "epoch": 3 }, { "type": "loss", "content": 0.00012180476187495515, "timestamp": "2025-09-10 02:32:57.158484", "step": 6910, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:57.188258", "step": 6910, "epoch": 3 }, { "type": "loss", "content": 0.000284523208392784, "timestamp": "2025-09-10 02:32:57.189937", "step": 6911, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:57.218732", "step": 6911, "epoch": 3 }, { "type": "loss", "content": 0.00013130329898558557, "timestamp": "2025-09-10 02:32:57.242470", "step": 6912, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:57.275039", "step": 6912, "epoch": 3 }, { "type": "loss", "content": 6.123074126662686e-05, "timestamp": "2025-09-10 02:32:57.277314", "step": 6913, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:57.309551", "step": 6913, "epoch": 3 }, { "type": "loss", "content": 0.0004217438108753413, "timestamp": "2025-09-10 02:32:57.311221", "step": 6914, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:57.342395", "step": 6914, "epoch": 3 }, { "type": "loss", "content": 0.026478629559278488, "timestamp": "2025-09-10 02:32:57.344449", "step": 6915, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:57.380086", "step": 6915, "epoch": 3 }, { "type": "loss", "content": 0.00012950468226335943, "timestamp": "2025-09-10 02:32:57.403492", "step": 6916, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:57.437685", "step": 6916, "epoch": 3 }, { "type": "loss", "content": 0.0018698758212849498, "timestamp": "2025-09-10 02:32:57.439566", "step": 6917, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:57.472147", "step": 6917, "epoch": 3 }, { "type": "loss", "content": 0.0007063063676469028, "timestamp": "2025-09-10 02:32:57.473801", "step": 6918, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:57.514566", "step": 6918, "epoch": 3 }, { "type": "loss", "content": 0.0015736583154648542, "timestamp": "2025-09-10 02:32:57.516352", "step": 6919, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:57.547008", "step": 6919, "epoch": 3 }, { "type": "loss", "content": 0.00035860989009961486, "timestamp": "2025-09-10 02:32:57.570523", "step": 6920, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:57.605124", "step": 6920, "epoch": 3 }, { "type": "loss", "content": 0.00014832009037490934, "timestamp": "2025-09-10 02:32:57.609625", "step": 6921, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:57.645670", "step": 6921, "epoch": 3 }, { "type": "loss", "content": 0.0010603677947074175, "timestamp": "2025-09-10 02:32:57.647678", "step": 6922, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:57.685549", "step": 6922, "epoch": 3 }, { "type": "loss", "content": 0.0002539857814554125, "timestamp": "2025-09-10 02:32:57.687415", "step": 6923, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:57.729694", "step": 6923, "epoch": 3 }, { "type": "loss", "content": 0.00019526577671058476, "timestamp": "2025-09-10 02:32:57.753269", "step": 6924, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:57.788113", "step": 6924, "epoch": 3 }, { "type": "loss", "content": 0.003988584503531456, "timestamp": "2025-09-10 02:32:57.790469", "step": 6925, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:57.824316", "step": 6925, "epoch": 3 }, { "type": "loss", "content": 0.008360235020518303, "timestamp": "2025-09-10 02:32:57.826514", "step": 6926, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:57.855788", "step": 6926, "epoch": 3 }, { "type": "loss", "content": 0.00013281036808621138, "timestamp": "2025-09-10 02:32:57.857906", "step": 6927, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:57.887022", "step": 6927, "epoch": 3 }, { "type": "loss", "content": 7.877969619585201e-05, "timestamp": "2025-09-10 02:32:57.910446", "step": 6928, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:57.939321", "step": 6928, "epoch": 3 }, { "type": "loss", "content": 0.0002676034055184573, "timestamp": "2025-09-10 02:32:57.941482", "step": 6929, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:57.970366", "step": 6929, "epoch": 3 }, { "type": "loss", "content": 0.0010093670571222901, "timestamp": "2025-09-10 02:32:57.972341", "step": 6930, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:58.002321", "step": 6930, "epoch": 3 }, { "type": "loss", "content": 0.0003377363027539104, "timestamp": "2025-09-10 02:32:58.004457", "step": 6931, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:58.037541", "step": 6931, "epoch": 3 }, { "type": "loss", "content": 0.00020868994761258364, "timestamp": "2025-09-10 02:32:58.060973", "step": 6932, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:58.091768", "step": 6932, "epoch": 3 }, { "type": "loss", "content": 0.00015123530465643853, "timestamp": "2025-09-10 02:32:58.093725", "step": 6933, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:58.122595", "step": 6933, "epoch": 3 }, { "type": "loss", "content": 0.0010079393396154046, "timestamp": "2025-09-10 02:32:58.124504", "step": 6934, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:58.153629", "step": 6934, "epoch": 3 }, { "type": "loss", "content": 0.001646541990339756, "timestamp": "2025-09-10 02:32:58.155399", "step": 6935, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:58.184634", "step": 6935, "epoch": 3 }, { "type": "loss", "content": 0.014478239230811596, "timestamp": "2025-09-10 02:32:58.207931", "step": 6936, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:58.237471", "step": 6936, "epoch": 3 }, { "type": "loss", "content": 0.0014481694670394063, "timestamp": "2025-09-10 02:32:58.239187", "step": 6937, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:58.267818", "step": 6937, "epoch": 3 }, { "type": "loss", "content": 0.018990149721503258, "timestamp": "2025-09-10 02:32:58.269765", "step": 6938, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:58.298365", "step": 6938, "epoch": 3 }, { "type": "loss", "content": 0.0002534259401727468, "timestamp": "2025-09-10 02:32:58.300099", "step": 6939, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:58.328733", "step": 6939, "epoch": 3 }, { "type": "loss", "content": 0.0007595556089654565, "timestamp": "2025-09-10 02:32:58.352099", "step": 6940, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:58.385807", "step": 6940, "epoch": 3 }, { "type": "loss", "content": 0.0003793592914007604, "timestamp": "2025-09-10 02:32:58.387736", "step": 6941, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:32:58.416932", "step": 6941, "epoch": 3 }, { "type": "loss", "content": 0.00811021588742733, "timestamp": "2025-09-10 02:32:58.418960", "step": 6942, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:58.447342", "step": 6942, "epoch": 3 }, { "type": "loss", "content": 0.00023596796381752938, "timestamp": "2025-09-10 02:32:58.449167", "step": 6943, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:58.477692", "step": 6943, "epoch": 3 }, { "type": "loss", "content": 0.00923880934715271, "timestamp": "2025-09-10 02:32:58.501069", "step": 6944, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:58.529658", "step": 6944, "epoch": 3 }, { "type": "loss", "content": 0.0009434501407667994, "timestamp": "2025-09-10 02:32:58.531349", "step": 6945, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:58.560292", "step": 6945, "epoch": 3 }, { "type": "loss", "content": 0.0006749120657332242, "timestamp": "2025-09-10 02:32:58.562118", "step": 6946, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:58.591017", "step": 6946, "epoch": 3 }, { "type": "loss", "content": 0.00024472290533594787, "timestamp": "2025-09-10 02:32:58.592771", "step": 6947, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:58.621205", "step": 6947, "epoch": 3 }, { "type": "loss", "content": 0.007182929199188948, "timestamp": "2025-09-10 02:32:58.644496", "step": 6948, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:58.673876", "step": 6948, "epoch": 3 }, { "type": "loss", "content": 0.001059431699104607, "timestamp": "2025-09-10 02:32:58.675611", "step": 6949, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:58.704406", "step": 6949, "epoch": 3 }, { "type": "loss", "content": 0.0002584144822321832, "timestamp": "2025-09-10 02:32:58.706115", "step": 6950, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:58.734683", "step": 6950, "epoch": 3 }, { "type": "loss", "content": 0.00016741511353757232, "timestamp": "2025-09-10 02:32:58.736562", "step": 6951, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:58.766692", "step": 6951, "epoch": 3 }, { "type": "loss", "content": 0.01323452778160572, "timestamp": "2025-09-10 02:32:58.790091", "step": 6952, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:58.823155", "step": 6952, "epoch": 3 }, { "type": "loss", "content": 0.0021357552614063025, "timestamp": "2025-09-10 02:32:58.824819", "step": 6953, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:58.853576", "step": 6953, "epoch": 3 }, { "type": "loss", "content": 0.00036122865276411176, "timestamp": "2025-09-10 02:32:58.855378", "step": 6954, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:58.884348", "step": 6954, "epoch": 3 }, { "type": "loss", "content": 0.001152566634118557, "timestamp": "2025-09-10 02:32:58.886174", "step": 6955, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:58.915118", "step": 6955, "epoch": 3 }, { "type": "loss", "content": 0.0010581667302176356, "timestamp": "2025-09-10 02:32:58.938593", "step": 6956, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:58.968249", "step": 6956, "epoch": 3 }, { "type": "loss", "content": 0.0008137312834151089, "timestamp": "2025-09-10 02:32:58.970113", "step": 6957, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:58.999489", "step": 6957, "epoch": 3 }, { "type": "loss", "content": 0.00020094976935070008, "timestamp": "2025-09-10 02:32:59.001321", "step": 6958, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:59.030461", "step": 6958, "epoch": 3 }, { "type": "loss", "content": 0.00020380767819005996, "timestamp": "2025-09-10 02:32:59.032290", "step": 6959, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:59.060995", "step": 6959, "epoch": 3 }, { "type": "loss", "content": 0.0004710287321358919, "timestamp": "2025-09-10 02:32:59.084163", "step": 6960, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:32:59.112888", "step": 6960, "epoch": 3 }, { "type": "loss", "content": 0.0007825446664355695, "timestamp": "2025-09-10 02:32:59.114617", "step": 6961, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:59.143254", "step": 6961, "epoch": 3 }, { "type": "loss", "content": 0.002040523337200284, "timestamp": "2025-09-10 02:32:59.144956", "step": 6962, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:59.174342", "step": 6962, "epoch": 3 }, { "type": "loss", "content": 0.004353491123765707, "timestamp": "2025-09-10 02:32:59.176354", "step": 6963, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:59.204976", "step": 6963, "epoch": 3 }, { "type": "loss", "content": 0.0005209581577219069, "timestamp": "2025-09-10 02:32:59.228125", "step": 6964, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:59.259906", "step": 6964, "epoch": 3 }, { "type": "loss", "content": 0.0013556017074733973, "timestamp": "2025-09-10 02:32:59.261794", "step": 6965, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:59.291234", "step": 6965, "epoch": 3 }, { "type": "loss", "content": 0.00023635872639715672, "timestamp": "2025-09-10 02:32:59.292948", "step": 6966, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:59.321455", "step": 6966, "epoch": 3 }, { "type": "loss", "content": 0.0002624272892717272, "timestamp": "2025-09-10 02:32:59.323256", "step": 6967, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:59.354900", "step": 6967, "epoch": 3 }, { "type": "loss", "content": 0.0016566140111535788, "timestamp": "2025-09-10 02:32:59.378256", "step": 6968, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:59.411602", "step": 6968, "epoch": 3 }, { "type": "loss", "content": 0.00015238292689900845, "timestamp": "2025-09-10 02:32:59.413392", "step": 6969, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:59.445483", "step": 6969, "epoch": 3 }, { "type": "loss", "content": 0.028976481407880783, "timestamp": "2025-09-10 02:32:59.447268", "step": 6970, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:59.479694", "step": 6970, "epoch": 3 }, { "type": "loss", "content": 0.0008458493393845856, "timestamp": "2025-09-10 02:32:59.481457", "step": 6971, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:59.513976", "step": 6971, "epoch": 3 }, { "type": "loss", "content": 0.00027471824432723224, "timestamp": "2025-09-10 02:32:59.537198", "step": 6972, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:59.570258", "step": 6972, "epoch": 3 }, { "type": "loss", "content": 0.003696908475831151, "timestamp": "2025-09-10 02:32:59.572137", "step": 6973, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:59.604747", "step": 6973, "epoch": 3 }, { "type": "loss", "content": 0.00034436071291565895, "timestamp": "2025-09-10 02:32:59.606590", "step": 6974, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:59.636562", "step": 6974, "epoch": 3 }, { "type": "loss", "content": 0.025353388860821724, "timestamp": "2025-09-10 02:32:59.638402", "step": 6975, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:59.671670", "step": 6975, "epoch": 3 }, { "type": "loss", "content": 0.00040680429083295166, "timestamp": "2025-09-10 02:32:59.694915", "step": 6976, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:59.728038", "step": 6976, "epoch": 3 }, { "type": "loss", "content": 0.0006491728127002716, "timestamp": "2025-09-10 02:32:59.729800", "step": 6977, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:59.768954", "step": 6977, "epoch": 3 }, { "type": "loss", "content": 0.0016482959035784006, "timestamp": "2025-09-10 02:32:59.770922", "step": 6978, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:59.807359", "step": 6978, "epoch": 3 }, { "type": "loss", "content": 0.00020466528076212853, "timestamp": "2025-09-10 02:32:59.809113", "step": 6979, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:32:59.837863", "step": 6979, "epoch": 3 }, { "type": "loss", "content": 0.005594300571829081, "timestamp": "2025-09-10 02:32:59.860979", "step": 6980, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:59.890241", "step": 6980, "epoch": 3 }, { "type": "loss", "content": 0.0012212840374559164, "timestamp": "2025-09-10 02:32:59.892133", "step": 6981, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:59.921147", "step": 6981, "epoch": 3 }, { "type": "loss", "content": 0.0004984021070413291, "timestamp": "2025-09-10 02:32:59.922835", "step": 6982, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:59.951706", "step": 6982, "epoch": 3 }, { "type": "loss", "content": 0.0018222949001938105, "timestamp": "2025-09-10 02:32:59.953875", "step": 6983, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:32:59.982874", "step": 6983, "epoch": 3 }, { "type": "loss", "content": 0.000587698828894645, "timestamp": "2025-09-10 02:33:00.006423", "step": 6984, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:33:00.036102", "step": 6984, "epoch": 3 }, { "type": "loss", "content": 0.0006888590869493783, "timestamp": "2025-09-10 02:33:00.038102", "step": 6985, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:00.066948", "step": 6985, "epoch": 3 }, { "type": "loss", "content": 0.001037910464219749, "timestamp": "2025-09-10 02:33:00.068866", "step": 6986, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:00.097635", "step": 6986, "epoch": 3 }, { "type": "loss", "content": 0.0006376398378051817, "timestamp": "2025-09-10 02:33:00.099373", "step": 6987, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:00.128090", "step": 6987, "epoch": 3 }, { "type": "loss", "content": 0.0011590939247980714, "timestamp": "2025-09-10 02:33:00.151318", "step": 6988, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:00.180167", "step": 6988, "epoch": 3 }, { "type": "loss", "content": 0.00022759917192161083, "timestamp": "2025-09-10 02:33:00.181824", "step": 6989, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:33:00.210315", "step": 6989, "epoch": 3 }, { "type": "loss", "content": 0.0004471018328331411, "timestamp": "2025-09-10 02:33:00.212205", "step": 6990, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:00.241366", "step": 6990, "epoch": 3 }, { "type": "loss", "content": 0.0002062628191197291, "timestamp": "2025-09-10 02:33:00.243152", "step": 6991, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:00.271988", "step": 6991, "epoch": 3 }, { "type": "loss", "content": 0.007703538052737713, "timestamp": "2025-09-10 02:33:00.295388", "step": 6992, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 96 ], "batch_size": 8, "flops": 1898404492032 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 5, 80 ], "batch_size": 8, "flops": 1582003754624 } ], "timestamp": "2025-09-10 02:33:02.197257", "step": 6992, "epoch": 3 }, { "type": "pplx", "content": 2788629.99139465, "timestamp": "2025-09-10 02:33:02.198869", "step": 6992, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:33:02.226827", "step": 6992, "epoch": 3 }, { "type": "loss", "content": 0.0019857888109982014, "timestamp": "2025-09-10 02:33:02.228421", "step": 6993, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:02.257719", "step": 6993, "epoch": 3 }, { "type": "loss", "content": 0.00016344898904208094, "timestamp": "2025-09-10 02:33:02.259518", "step": 6994, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:02.288228", "step": 6994, "epoch": 3 }, { "type": "loss", "content": 0.0008041561814025044, "timestamp": "2025-09-10 02:33:02.289747", "step": 6995, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:02.318588", "step": 6995, "epoch": 3 }, { "type": "loss", "content": 0.005006737541407347, "timestamp": "2025-09-10 02:33:02.341876", "step": 6996, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:02.370884", "step": 6996, "epoch": 3 }, { "type": "loss", "content": 0.002978698117658496, "timestamp": "2025-09-10 02:33:02.372871", "step": 6997, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:02.402050", "step": 6997, "epoch": 3 }, { "type": "loss", "content": 0.0004123369581066072, "timestamp": "2025-09-10 02:33:02.403953", "step": 6998, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:02.433024", "step": 6998, "epoch": 3 }, { "type": "loss", "content": 0.0001701653382042423, "timestamp": "2025-09-10 02:33:02.434885", "step": 6999, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:02.463977", "step": 6999, "epoch": 3 }, { "type": "loss", "content": 0.0028805527836084366, "timestamp": "2025-09-10 02:33:02.487486", "step": 7000, "epoch": 3 }, { "type": "info", "content": "Checkpoint saved at step 7000", "timestamp": "2025-09-10 02:33:07.082404", "step": 7000, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:33:07.120951", "step": 7000, "epoch": 3 }, { "type": "loss", "content": 0.0010814116103574634, "timestamp": "2025-09-10 02:33:07.122798", "step": 7001, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:07.152114", "step": 7001, "epoch": 3 }, { "type": "loss", "content": 0.00018377190281171352, "timestamp": "2025-09-10 02:33:07.153897", "step": 7002, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:07.183405", "step": 7002, "epoch": 3 }, { "type": "loss", "content": 0.00031134625896811485, "timestamp": "2025-09-10 02:33:07.185103", "step": 7003, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:33:07.215771", "step": 7003, "epoch": 3 }, { "type": "loss", "content": 0.0001239635021192953, "timestamp": "2025-09-10 02:33:07.239382", "step": 7004, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:07.271809", "step": 7004, "epoch": 3 }, { "type": "loss", "content": 0.006299679167568684, "timestamp": "2025-09-10 02:33:07.273756", "step": 7005, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:33:07.306787", "step": 7005, "epoch": 3 }, { "type": "loss", "content": 0.0036267682444304228, "timestamp": "2025-09-10 02:33:07.309180", "step": 7006, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:07.340179", "step": 7006, "epoch": 3 }, { "type": "loss", "content": 0.0032613552175462246, "timestamp": "2025-09-10 02:33:07.341999", "step": 7007, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:33:07.371625", "step": 7007, "epoch": 3 }, { "type": "loss", "content": 0.0004091924347449094, "timestamp": "2025-09-10 02:33:07.395328", "step": 7008, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:07.429746", "step": 7008, "epoch": 3 }, { "type": "loss", "content": 0.00021148764062672853, "timestamp": "2025-09-10 02:33:07.431780", "step": 7009, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:07.462429", "step": 7009, "epoch": 3 }, { "type": "loss", "content": 0.00019017969316337258, "timestamp": "2025-09-10 02:33:07.464148", "step": 7010, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:07.501175", "step": 7010, "epoch": 3 }, { "type": "loss", "content": 9.069267252925783e-05, "timestamp": "2025-09-10 02:33:07.503016", "step": 7011, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:07.536144", "step": 7011, "epoch": 3 }, { "type": "loss", "content": 0.00021314578771125525, "timestamp": "2025-09-10 02:33:07.559260", "step": 7012, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:07.593723", "step": 7012, "epoch": 3 }, { "type": "loss", "content": 0.00013223057612776756, "timestamp": "2025-09-10 02:33:07.595786", "step": 7013, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:07.626792", "step": 7013, "epoch": 3 }, { "type": "loss", "content": 0.001440140069462359, "timestamp": "2025-09-10 02:33:07.628721", "step": 7014, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:33:07.662020", "step": 7014, "epoch": 3 }, { "type": "loss", "content": 0.00016096878971438855, "timestamp": "2025-09-10 02:33:07.663853", "step": 7015, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:07.700876", "step": 7015, "epoch": 3 }, { "type": "loss", "content": 0.003439195454120636, "timestamp": "2025-09-10 02:33:07.723998", "step": 7016, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:07.763270", "step": 7016, "epoch": 3 }, { "type": "loss", "content": 0.00029929561424069107, "timestamp": "2025-09-10 02:33:07.765048", "step": 7017, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:07.801808", "step": 7017, "epoch": 3 }, { "type": "loss", "content": 0.0007030866108834743, "timestamp": "2025-09-10 02:33:07.804001", "step": 7018, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:07.832842", "step": 7018, "epoch": 3 }, { "type": "loss", "content": 0.00105840596370399, "timestamp": "2025-09-10 02:33:07.834626", "step": 7019, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:07.863503", "step": 7019, "epoch": 3 }, { "type": "loss", "content": 0.00040280394023284316, "timestamp": "2025-09-10 02:33:07.886653", "step": 7020, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:07.916026", "step": 7020, "epoch": 3 }, { "type": "loss", "content": 0.005881170276552439, "timestamp": "2025-09-10 02:33:07.918026", "step": 7021, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:07.946997", "step": 7021, "epoch": 3 }, { "type": "loss", "content": 0.01138144638389349, "timestamp": "2025-09-10 02:33:07.948827", "step": 7022, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:33:07.977492", "step": 7022, "epoch": 3 }, { "type": "loss", "content": 0.0017692273249849677, "timestamp": "2025-09-10 02:33:07.979426", "step": 7023, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:08.009160", "step": 7023, "epoch": 3 }, { "type": "loss", "content": 0.00033304395037703216, "timestamp": "2025-09-10 02:33:08.032143", "step": 7024, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:08.063251", "step": 7024, "epoch": 3 }, { "type": "loss", "content": 0.0007116887718439102, "timestamp": "2025-09-10 02:33:08.065338", "step": 7025, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:08.094177", "step": 7025, "epoch": 3 }, { "type": "loss", "content": 0.00013543645036406815, "timestamp": "2025-09-10 02:33:08.095933", "step": 7026, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:08.124474", "step": 7026, "epoch": 3 }, { "type": "loss", "content": 0.00411624601110816, "timestamp": "2025-09-10 02:33:08.126108", "step": 7027, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:08.154812", "step": 7027, "epoch": 3 }, { "type": "loss", "content": 0.0014488935703411698, "timestamp": "2025-09-10 02:33:08.178088", "step": 7028, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:08.206856", "step": 7028, "epoch": 3 }, { "type": "loss", "content": 0.0002482616691850126, "timestamp": "2025-09-10 02:33:08.208596", "step": 7029, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:08.237202", "step": 7029, "epoch": 3 }, { "type": "loss", "content": 0.0005101663991808891, "timestamp": "2025-09-10 02:33:08.239059", "step": 7030, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:08.267644", "step": 7030, "epoch": 3 }, { "type": "loss", "content": 9.328716259915382e-05, "timestamp": "2025-09-10 02:33:08.269807", "step": 7031, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:08.298612", "step": 7031, "epoch": 3 }, { "type": "loss", "content": 0.00046753903734497726, "timestamp": "2025-09-10 02:33:08.321581", "step": 7032, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:08.350133", "step": 7032, "epoch": 3 }, { "type": "loss", "content": 0.0002330717834411189, "timestamp": "2025-09-10 02:33:08.351866", "step": 7033, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:08.380531", "step": 7033, "epoch": 3 }, { "type": "loss", "content": 0.0003099815803579986, "timestamp": "2025-09-10 02:33:08.382339", "step": 7034, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:08.411941", "step": 7034, "epoch": 3 }, { "type": "loss", "content": 0.0008793718297965825, "timestamp": "2025-09-10 02:33:08.413760", "step": 7035, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:08.442491", "step": 7035, "epoch": 3 }, { "type": "loss", "content": 9.09976297407411e-05, "timestamp": "2025-09-10 02:33:08.466333", "step": 7036, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:08.495269", "step": 7036, "epoch": 3 }, { "type": "loss", "content": 0.0005195220583118498, "timestamp": "2025-09-10 02:33:08.497078", "step": 7037, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:08.527802", "step": 7037, "epoch": 3 }, { "type": "loss", "content": 0.0562581941485405, "timestamp": "2025-09-10 02:33:08.529672", "step": 7038, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:08.558419", "step": 7038, "epoch": 3 }, { "type": "loss", "content": 0.0029173591174185276, "timestamp": "2025-09-10 02:33:08.560092", "step": 7039, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:08.588440", "step": 7039, "epoch": 3 }, { "type": "loss", "content": 0.00016022950876504183, "timestamp": "2025-09-10 02:33:08.611833", "step": 7040, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:08.640821", "step": 7040, "epoch": 3 }, { "type": "loss", "content": 0.0002844734990503639, "timestamp": "2025-09-10 02:33:08.642779", "step": 7041, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:08.671202", "step": 7041, "epoch": 3 }, { "type": "loss", "content": 0.005056020338088274, "timestamp": "2025-09-10 02:33:08.672861", "step": 7042, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:08.701540", "step": 7042, "epoch": 3 }, { "type": "loss", "content": 8.879298547981307e-05, "timestamp": "2025-09-10 02:33:08.703404", "step": 7043, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:08.732832", "step": 7043, "epoch": 3 }, { "type": "loss", "content": 0.00011597503180382773, "timestamp": "2025-09-10 02:33:08.756015", "step": 7044, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:08.787776", "step": 7044, "epoch": 3 }, { "type": "loss", "content": 0.0006039845175109804, "timestamp": "2025-09-10 02:33:08.789459", "step": 7045, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:33:08.822620", "step": 7045, "epoch": 3 }, { "type": "loss", "content": 0.0007330150110647082, "timestamp": "2025-09-10 02:33:08.824583", "step": 7046, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:08.854166", "step": 7046, "epoch": 3 }, { "type": "loss", "content": 0.0001059885835275054, "timestamp": "2025-09-10 02:33:08.855644", "step": 7047, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:08.884369", "step": 7047, "epoch": 3 }, { "type": "loss", "content": 0.006155488546937704, "timestamp": "2025-09-10 02:33:08.907802", "step": 7048, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:33:08.936886", "step": 7048, "epoch": 3 }, { "type": "loss", "content": 0.00010457994358148426, "timestamp": "2025-09-10 02:33:08.938901", "step": 7049, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:08.967971", "step": 7049, "epoch": 3 }, { "type": "loss", "content": 0.00033087312476709485, "timestamp": "2025-09-10 02:33:08.970100", "step": 7050, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:08.999016", "step": 7050, "epoch": 3 }, { "type": "loss", "content": 0.00039945446769706905, "timestamp": "2025-09-10 02:33:09.000899", "step": 7051, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:09.029868", "step": 7051, "epoch": 3 }, { "type": "loss", "content": 8.988494664663449e-05, "timestamp": "2025-09-10 02:33:09.053710", "step": 7052, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:09.082648", "step": 7052, "epoch": 3 }, { "type": "loss", "content": 0.00014014882617630064, "timestamp": "2025-09-10 02:33:09.084651", "step": 7053, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:09.113809", "step": 7053, "epoch": 3 }, { "type": "loss", "content": 0.04872704669833183, "timestamp": "2025-09-10 02:33:09.115560", "step": 7054, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:33:09.145171", "step": 7054, "epoch": 3 }, { "type": "loss", "content": 0.00013906206004321575, "timestamp": "2025-09-10 02:33:09.147007", "step": 7055, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:09.177313", "step": 7055, "epoch": 3 }, { "type": "loss", "content": 0.01986103318631649, "timestamp": "2025-09-10 02:33:09.200518", "step": 7056, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:09.229809", "step": 7056, "epoch": 3 }, { "type": "loss", "content": 8.047802111832425e-05, "timestamp": "2025-09-10 02:33:09.231626", "step": 7057, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:09.260482", "step": 7057, "epoch": 3 }, { "type": "loss", "content": 0.00015633231669198722, "timestamp": "2025-09-10 02:33:09.262442", "step": 7058, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:33:09.291547", "step": 7058, "epoch": 3 }, { "type": "loss", "content": 0.00010946964175673202, "timestamp": "2025-09-10 02:33:09.293625", "step": 7059, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:09.323136", "step": 7059, "epoch": 3 }, { "type": "loss", "content": 0.0009689336875453591, "timestamp": "2025-09-10 02:33:09.346840", "step": 7060, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:09.376370", "step": 7060, "epoch": 3 }, { "type": "loss", "content": 0.00028793231467716396, "timestamp": "2025-09-10 02:33:09.378375", "step": 7061, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:09.407627", "step": 7061, "epoch": 3 }, { "type": "loss", "content": 0.00020774765289388597, "timestamp": "2025-09-10 02:33:09.409616", "step": 7062, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:09.438833", "step": 7062, "epoch": 3 }, { "type": "loss", "content": 0.00014935732178855687, "timestamp": "2025-09-10 02:33:09.441043", "step": 7063, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:09.470048", "step": 7063, "epoch": 3 }, { "type": "loss", "content": 0.0001656820677453652, "timestamp": "2025-09-10 02:33:09.493672", "step": 7064, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:09.523290", "step": 7064, "epoch": 3 }, { "type": "loss", "content": 0.0003780599217861891, "timestamp": "2025-09-10 02:33:09.525548", "step": 7065, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:09.554806", "step": 7065, "epoch": 3 }, { "type": "loss", "content": 0.0020802337676286697, "timestamp": "2025-09-10 02:33:09.556876", "step": 7066, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:09.586119", "step": 7066, "epoch": 3 }, { "type": "loss", "content": 5.816563134430908e-05, "timestamp": "2025-09-10 02:33:09.588579", "step": 7067, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:33:09.617910", "step": 7067, "epoch": 3 }, { "type": "loss", "content": 0.0018779776291921735, "timestamp": "2025-09-10 02:33:09.641306", "step": 7068, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:33:09.674707", "step": 7068, "epoch": 3 }, { "type": "loss", "content": 0.0007602676632814109, "timestamp": "2025-09-10 02:33:09.676646", "step": 7069, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:33:09.705895", "step": 7069, "epoch": 3 }, { "type": "loss", "content": 0.0005338700721040368, "timestamp": "2025-09-10 02:33:09.708641", "step": 7070, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:09.738086", "step": 7070, "epoch": 3 }, { "type": "loss", "content": 0.00016905389202293009, "timestamp": "2025-09-10 02:33:09.740063", "step": 7071, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:09.768856", "step": 7071, "epoch": 3 }, { "type": "loss", "content": 0.00012695681652985513, "timestamp": "2025-09-10 02:33:09.792327", "step": 7072, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:09.821947", "step": 7072, "epoch": 3 }, { "type": "loss", "content": 0.0005912245833314955, "timestamp": "2025-09-10 02:33:09.823837", "step": 7073, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:09.852869", "step": 7073, "epoch": 3 }, { "type": "loss", "content": 0.00019086863903794438, "timestamp": "2025-09-10 02:33:09.855130", "step": 7074, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:09.884344", "step": 7074, "epoch": 3 }, { "type": "loss", "content": 0.00011014730262104422, "timestamp": "2025-09-10 02:33:09.886405", "step": 7075, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:33:09.915777", "step": 7075, "epoch": 3 }, { "type": "loss", "content": 0.00023562587739434093, "timestamp": "2025-09-10 02:33:09.939531", "step": 7076, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:09.968972", "step": 7076, "epoch": 3 }, { "type": "loss", "content": 0.0007138791843317449, "timestamp": "2025-09-10 02:33:09.971048", "step": 7077, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:10.000140", "step": 7077, "epoch": 3 }, { "type": "loss", "content": 0.009201214648783207, "timestamp": "2025-09-10 02:33:10.002037", "step": 7078, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:10.031061", "step": 7078, "epoch": 3 }, { "type": "loss", "content": 0.00014139436825644225, "timestamp": "2025-09-10 02:33:10.032945", "step": 7079, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:33:10.061988", "step": 7079, "epoch": 3 }, { "type": "loss", "content": 7.881080091465265e-05, "timestamp": "2025-09-10 02:33:10.085332", "step": 7080, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:33:10.114309", "step": 7080, "epoch": 3 }, { "type": "loss", "content": 0.0001724832400213927, "timestamp": "2025-09-10 02:33:10.116108", "step": 7081, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:10.145088", "step": 7081, "epoch": 3 }, { "type": "loss", "content": 5.758378028986044e-05, "timestamp": "2025-09-10 02:33:10.147419", "step": 7082, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:10.177042", "step": 7082, "epoch": 3 }, { "type": "loss", "content": 8.480289397994056e-05, "timestamp": "2025-09-10 02:33:10.179577", "step": 7083, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:10.212970", "step": 7083, "epoch": 3 }, { "type": "loss", "content": 0.0001304411853197962, "timestamp": "2025-09-10 02:33:10.236423", "step": 7084, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:10.275451", "step": 7084, "epoch": 3 }, { "type": "loss", "content": 0.00011326195817673579, "timestamp": "2025-09-10 02:33:10.278168", "step": 7085, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:10.318027", "step": 7085, "epoch": 3 }, { "type": "loss", "content": 0.00013952630979474634, "timestamp": "2025-09-10 02:33:10.320107", "step": 7086, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:10.360881", "step": 7086, "epoch": 3 }, { "type": "loss", "content": 0.00010285182361258194, "timestamp": "2025-09-10 02:33:10.363305", "step": 7087, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:10.404923", "step": 7087, "epoch": 3 }, { "type": "loss", "content": 0.0013592649484053254, "timestamp": "2025-09-10 02:33:10.428124", "step": 7088, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:33:10.460867", "step": 7088, "epoch": 3 }, { "type": "loss", "content": 0.001080493675544858, "timestamp": "2025-09-10 02:33:10.462765", "step": 7089, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:10.492077", "step": 7089, "epoch": 3 }, { "type": "loss", "content": 0.0003453026874922216, "timestamp": "2025-09-10 02:33:10.494152", "step": 7090, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:33:10.523051", "step": 7090, "epoch": 3 }, { "type": "loss", "content": 0.00012879457790404558, "timestamp": "2025-09-10 02:33:10.525023", "step": 7091, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:33:10.553881", "step": 7091, "epoch": 3 }, { "type": "loss", "content": 5.4374802857637405e-05, "timestamp": "2025-09-10 02:33:10.577498", "step": 7092, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:10.606928", "step": 7092, "epoch": 3 }, { "type": "loss", "content": 0.00010372676479164511, "timestamp": "2025-09-10 02:33:10.608820", "step": 7093, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:10.637850", "step": 7093, "epoch": 3 }, { "type": "loss", "content": 7.86098898970522e-05, "timestamp": "2025-09-10 02:33:10.639884", "step": 7094, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:10.668905", "step": 7094, "epoch": 3 }, { "type": "loss", "content": 0.0003556795709300786, "timestamp": "2025-09-10 02:33:10.671000", "step": 7095, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:10.700400", "step": 7095, "epoch": 3 }, { "type": "loss", "content": 8.15380408312194e-05, "timestamp": "2025-09-10 02:33:10.723675", "step": 7096, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:10.753461", "step": 7096, "epoch": 3 }, { "type": "loss", "content": 6.984026549616829e-05, "timestamp": "2025-09-10 02:33:10.755185", "step": 7097, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:10.786817", "step": 7097, "epoch": 3 }, { "type": "loss", "content": 0.00014169642236083746, "timestamp": "2025-09-10 02:33:10.788852", "step": 7098, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:10.822338", "step": 7098, "epoch": 3 }, { "type": "loss", "content": 8.376573532586917e-05, "timestamp": "2025-09-10 02:33:10.824023", "step": 7099, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:10.852999", "step": 7099, "epoch": 3 }, { "type": "loss", "content": 6.304789712885395e-05, "timestamp": "2025-09-10 02:33:10.876561", "step": 7100, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:10.905811", "step": 7100, "epoch": 3 }, { "type": "loss", "content": 6.13820884609595e-05, "timestamp": "2025-09-10 02:33:10.907445", "step": 7101, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:10.936023", "step": 7101, "epoch": 3 }, { "type": "loss", "content": 0.00020228189532645047, "timestamp": "2025-09-10 02:33:10.937722", "step": 7102, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:10.966383", "step": 7102, "epoch": 3 }, { "type": "loss", "content": 9.113523265114054e-05, "timestamp": "2025-09-10 02:33:10.968095", "step": 7103, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:10.997019", "step": 7103, "epoch": 3 }, { "type": "loss", "content": 0.0001561507669975981, "timestamp": "2025-09-10 02:33:11.020212", "step": 7104, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:11.048872", "step": 7104, "epoch": 3 }, { "type": "loss", "content": 0.02126910910010338, "timestamp": "2025-09-10 02:33:11.050647", "step": 7105, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:11.079591", "step": 7105, "epoch": 3 }, { "type": "loss", "content": 8.194655674742535e-05, "timestamp": "2025-09-10 02:33:11.081327", "step": 7106, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:11.110037", "step": 7106, "epoch": 3 }, { "type": "loss", "content": 0.00017607762129046023, "timestamp": "2025-09-10 02:33:11.112060", "step": 7107, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:11.140932", "step": 7107, "epoch": 3 }, { "type": "loss", "content": 0.0005335750174708664, "timestamp": "2025-09-10 02:33:11.164766", "step": 7108, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:11.194000", "step": 7108, "epoch": 3 }, { "type": "loss", "content": 0.0002091860951622948, "timestamp": "2025-09-10 02:33:11.195819", "step": 7109, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:11.223958", "step": 7109, "epoch": 3 }, { "type": "loss", "content": 6.13267402513884e-05, "timestamp": "2025-09-10 02:33:11.225777", "step": 7110, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:11.254286", "step": 7110, "epoch": 3 }, { "type": "loss", "content": 9.796098311198875e-05, "timestamp": "2025-09-10 02:33:11.256209", "step": 7111, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:11.285231", "step": 7111, "epoch": 3 }, { "type": "loss", "content": 8.225706551456824e-05, "timestamp": "2025-09-10 02:33:11.308549", "step": 7112, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:11.337941", "step": 7112, "epoch": 3 }, { "type": "loss", "content": 0.0002503559517208487, "timestamp": "2025-09-10 02:33:11.339600", "step": 7113, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:11.368638", "step": 7113, "epoch": 3 }, { "type": "loss", "content": 0.00012760628305841237, "timestamp": "2025-09-10 02:33:11.370376", "step": 7114, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:11.399365", "step": 7114, "epoch": 3 }, { "type": "loss", "content": 0.030732514336705208, "timestamp": "2025-09-10 02:33:11.401676", "step": 7115, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:11.430164", "step": 7115, "epoch": 3 }, { "type": "loss", "content": 0.0001161035688710399, "timestamp": "2025-09-10 02:33:11.453389", "step": 7116, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:11.482628", "step": 7116, "epoch": 3 }, { "type": "loss", "content": 0.00017230946104973555, "timestamp": "2025-09-10 02:33:11.484106", "step": 7117, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:11.512805", "step": 7117, "epoch": 3 }, { "type": "loss", "content": 0.0063788327388465405, "timestamp": "2025-09-10 02:33:11.514453", "step": 7118, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:11.543460", "step": 7118, "epoch": 3 }, { "type": "loss", "content": 0.008579443208873272, "timestamp": "2025-09-10 02:33:11.545130", "step": 7119, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:11.573738", "step": 7119, "epoch": 3 }, { "type": "loss", "content": 9.17142751859501e-05, "timestamp": "2025-09-10 02:33:11.597047", "step": 7120, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:11.626131", "step": 7120, "epoch": 3 }, { "type": "loss", "content": 0.0007114761392585933, "timestamp": "2025-09-10 02:33:11.628906", "step": 7121, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:33:11.658035", "step": 7121, "epoch": 3 }, { "type": "loss", "content": 5.198407598072663e-05, "timestamp": "2025-09-10 02:33:11.659818", "step": 7122, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:11.688805", "step": 7122, "epoch": 3 }, { "type": "loss", "content": 0.00018394214566797018, "timestamp": "2025-09-10 02:33:11.691009", "step": 7123, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:33:11.720150", "step": 7123, "epoch": 3 }, { "type": "loss", "content": 0.00014498808013740927, "timestamp": "2025-09-10 02:33:11.743525", "step": 7124, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:11.773032", "step": 7124, "epoch": 3 }, { "type": "loss", "content": 0.002670370042324066, "timestamp": "2025-09-10 02:33:11.774689", "step": 7125, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:11.803096", "step": 7125, "epoch": 3 }, { "type": "loss", "content": 0.00017485507123637944, "timestamp": "2025-09-10 02:33:11.804724", "step": 7126, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:11.833289", "step": 7126, "epoch": 3 }, { "type": "loss", "content": 0.01103391032665968, "timestamp": "2025-09-10 02:33:11.834951", "step": 7127, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:33:11.863638", "step": 7127, "epoch": 3 }, { "type": "loss", "content": 0.0075844330713152885, "timestamp": "2025-09-10 02:33:11.886946", "step": 7128, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:33:11.915880", "step": 7128, "epoch": 3 }, { "type": "loss", "content": 4.473465378396213e-05, "timestamp": "2025-09-10 02:33:11.917567", "step": 7129, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:11.945908", "step": 7129, "epoch": 3 }, { "type": "loss", "content": 0.018360259011387825, "timestamp": "2025-09-10 02:33:11.947553", "step": 7130, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:33:11.976286", "step": 7130, "epoch": 3 }, { "type": "loss", "content": 0.00010080776701215655, "timestamp": "2025-09-10 02:33:11.978285", "step": 7131, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:33:12.007334", "step": 7131, "epoch": 3 }, { "type": "loss", "content": 0.00028588445275090635, "timestamp": "2025-09-10 02:33:12.031048", "step": 7132, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:12.060107", "step": 7132, "epoch": 3 }, { "type": "loss", "content": 0.00037449359660968184, "timestamp": "2025-09-10 02:33:12.062265", "step": 7133, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:12.090891", "step": 7133, "epoch": 3 }, { "type": "loss", "content": 6.24270542175509e-05, "timestamp": "2025-09-10 02:33:12.093127", "step": 7134, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:12.123322", "step": 7134, "epoch": 3 }, { "type": "loss", "content": 0.0013547196285799146, "timestamp": "2025-09-10 02:33:12.125008", "step": 7135, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:33:12.153806", "step": 7135, "epoch": 3 }, { "type": "loss", "content": 0.0015526112401857972, "timestamp": "2025-09-10 02:33:12.178427", "step": 7136, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:33:12.213235", "step": 7136, "epoch": 3 }, { "type": "loss", "content": 7.431041012750939e-05, "timestamp": "2025-09-10 02:33:12.215082", "step": 7137, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:12.246318", "step": 7137, "epoch": 3 }, { "type": "loss", "content": 0.00011348089174134657, "timestamp": "2025-09-10 02:33:12.248304", "step": 7138, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:33:12.287892", "step": 7138, "epoch": 3 }, { "type": "loss", "content": 0.0002764788514468819, "timestamp": "2025-09-10 02:33:12.289518", "step": 7139, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:12.330768", "step": 7139, "epoch": 3 }, { "type": "loss", "content": 0.00024690685677342117, "timestamp": "2025-09-10 02:33:12.353954", "step": 7140, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:12.393309", "step": 7140, "epoch": 3 }, { "type": "loss", "content": 0.0007415753207169473, "timestamp": "2025-09-10 02:33:12.395041", "step": 7141, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:12.436214", "step": 7141, "epoch": 3 }, { "type": "loss", "content": 0.026568885892629623, "timestamp": "2025-09-10 02:33:12.437929", "step": 7142, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:12.470612", "step": 7142, "epoch": 3 }, { "type": "loss", "content": 0.00014456224744208157, "timestamp": "2025-09-10 02:33:12.472229", "step": 7143, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:12.501086", "step": 7143, "epoch": 3 }, { "type": "loss", "content": 0.00011599133722484112, "timestamp": "2025-09-10 02:33:12.524580", "step": 7144, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 96 ], "batch_size": 8, "flops": 1898404492032 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 5, 80 ], "batch_size": 8, "flops": 1582003754624 } ], "timestamp": "2025-09-10 02:33:14.415894", "step": 7144, "epoch": 3 }, { "type": "pplx", "content": 2708584.2478265725, "timestamp": "2025-09-10 02:33:14.417794", "step": 7144, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:14.457237", "step": 7144, "epoch": 3 }, { "type": "loss", "content": 0.00030244843219406903, "timestamp": "2025-09-10 02:33:14.459456", "step": 7145, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:33:14.495339", "step": 7145, "epoch": 3 }, { "type": "loss", "content": 0.03959156200289726, "timestamp": "2025-09-10 02:33:14.497484", "step": 7146, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:33:14.526610", "step": 7146, "epoch": 3 }, { "type": "loss", "content": 0.020285243168473244, "timestamp": "2025-09-10 02:33:14.528282", "step": 7147, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:14.557424", "step": 7147, "epoch": 3 }, { "type": "loss", "content": 0.00016595340275671333, "timestamp": "2025-09-10 02:33:14.580963", "step": 7148, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:14.609648", "step": 7148, "epoch": 3 }, { "type": "loss", "content": 0.00016575872723478824, "timestamp": "2025-09-10 02:33:14.611955", "step": 7149, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:14.640692", "step": 7149, "epoch": 3 }, { "type": "loss", "content": 0.0009058531140908599, "timestamp": "2025-09-10 02:33:14.642661", "step": 7150, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:14.671529", "step": 7150, "epoch": 3 }, { "type": "loss", "content": 9.503169712843373e-05, "timestamp": "2025-09-10 02:33:14.673479", "step": 7151, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:14.702824", "step": 7151, "epoch": 3 }, { "type": "loss", "content": 0.00025164708495140076, "timestamp": "2025-09-10 02:33:14.726531", "step": 7152, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:14.755504", "step": 7152, "epoch": 3 }, { "type": "loss", "content": 0.00029217940755188465, "timestamp": "2025-09-10 02:33:14.757421", "step": 7153, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:14.787597", "step": 7153, "epoch": 3 }, { "type": "loss", "content": 0.0020050134044140577, "timestamp": "2025-09-10 02:33:14.789557", "step": 7154, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:14.826020", "step": 7154, "epoch": 3 }, { "type": "loss", "content": 0.00011182740854565054, "timestamp": "2025-09-10 02:33:14.827768", "step": 7155, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:14.856359", "step": 7155, "epoch": 3 }, { "type": "loss", "content": 0.00018755366909317672, "timestamp": "2025-09-10 02:33:14.881846", "step": 7156, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:14.910895", "step": 7156, "epoch": 3 }, { "type": "loss", "content": 8.767979306867346e-05, "timestamp": "2025-09-10 02:33:14.912692", "step": 7157, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:14.941693", "step": 7157, "epoch": 3 }, { "type": "loss", "content": 0.00010503112571313977, "timestamp": "2025-09-10 02:33:14.943494", "step": 7158, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:14.972214", "step": 7158, "epoch": 3 }, { "type": "loss", "content": 0.00032662873854860663, "timestamp": "2025-09-10 02:33:14.973935", "step": 7159, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:15.002913", "step": 7159, "epoch": 3 }, { "type": "loss", "content": 0.00044180676923133433, "timestamp": "2025-09-10 02:33:15.026444", "step": 7160, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:33:15.055900", "step": 7160, "epoch": 3 }, { "type": "loss", "content": 0.00045780817163176835, "timestamp": "2025-09-10 02:33:15.057788", "step": 7161, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:33:15.087130", "step": 7161, "epoch": 3 }, { "type": "loss", "content": 0.05401367321610451, "timestamp": "2025-09-10 02:33:15.089430", "step": 7162, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:15.118307", "step": 7162, "epoch": 3 }, { "type": "loss", "content": 0.0010221587726846337, "timestamp": "2025-09-10 02:33:15.120440", "step": 7163, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:33:15.150325", "step": 7163, "epoch": 3 }, { "type": "loss", "content": 0.00013791497622150928, "timestamp": "2025-09-10 02:33:15.173758", "step": 7164, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:15.203079", "step": 7164, "epoch": 3 }, { "type": "loss", "content": 9.647140541346744e-05, "timestamp": "2025-09-10 02:33:15.205021", "step": 7165, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:15.235398", "step": 7165, "epoch": 3 }, { "type": "loss", "content": 0.007709108758717775, "timestamp": "2025-09-10 02:33:15.237422", "step": 7166, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:15.267408", "step": 7166, "epoch": 3 }, { "type": "loss", "content": 0.024306802079081535, "timestamp": "2025-09-10 02:33:15.269540", "step": 7167, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:15.299283", "step": 7167, "epoch": 3 }, { "type": "loss", "content": 0.00029987801099196076, "timestamp": "2025-09-10 02:33:15.322949", "step": 7168, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:15.351698", "step": 7168, "epoch": 3 }, { "type": "loss", "content": 0.00010465878585819155, "timestamp": "2025-09-10 02:33:15.353859", "step": 7169, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:15.382813", "step": 7169, "epoch": 3 }, { "type": "loss", "content": 0.0001675562234595418, "timestamp": "2025-09-10 02:33:15.384924", "step": 7170, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:33:15.413753", "step": 7170, "epoch": 3 }, { "type": "loss", "content": 0.00011116742098238319, "timestamp": "2025-09-10 02:33:15.415792", "step": 7171, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:15.445616", "step": 7171, "epoch": 3 }, { "type": "loss", "content": 7.266044121934101e-05, "timestamp": "2025-09-10 02:33:15.469016", "step": 7172, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:15.498326", "step": 7172, "epoch": 3 }, { "type": "loss", "content": 0.0003432599769439548, "timestamp": "2025-09-10 02:33:15.499996", "step": 7173, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:15.528880", "step": 7173, "epoch": 3 }, { "type": "loss", "content": 0.0017077813390642405, "timestamp": "2025-09-10 02:33:15.530784", "step": 7174, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:15.559697", "step": 7174, "epoch": 3 }, { "type": "loss", "content": 0.011404238641262054, "timestamp": "2025-09-10 02:33:15.561644", "step": 7175, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:15.590602", "step": 7175, "epoch": 3 }, { "type": "loss", "content": 0.0005473392084240913, "timestamp": "2025-09-10 02:33:15.613750", "step": 7176, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:15.642703", "step": 7176, "epoch": 3 }, { "type": "loss", "content": 0.016036823391914368, "timestamp": "2025-09-10 02:33:15.644821", "step": 7177, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:15.673697", "step": 7177, "epoch": 3 }, { "type": "loss", "content": 0.00032304725027643144, "timestamp": "2025-09-10 02:33:15.675865", "step": 7178, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:15.704670", "step": 7178, "epoch": 3 }, { "type": "loss", "content": 0.00021238785120658576, "timestamp": "2025-09-10 02:33:15.707587", "step": 7179, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:15.736953", "step": 7179, "epoch": 3 }, { "type": "loss", "content": 0.00025740201817825437, "timestamp": "2025-09-10 02:33:15.760971", "step": 7180, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:15.790311", "step": 7180, "epoch": 3 }, { "type": "loss", "content": 0.0022035904694348574, "timestamp": "2025-09-10 02:33:15.792545", "step": 7181, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:15.822862", "step": 7181, "epoch": 3 }, { "type": "loss", "content": 0.01289613451808691, "timestamp": "2025-09-10 02:33:15.825429", "step": 7182, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:15.854335", "step": 7182, "epoch": 3 }, { "type": "loss", "content": 0.00020831004076171666, "timestamp": "2025-09-10 02:33:15.856443", "step": 7183, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:15.886240", "step": 7183, "epoch": 3 }, { "type": "loss", "content": 0.0003103218332398683, "timestamp": "2025-09-10 02:33:15.909831", "step": 7184, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:33:15.939262", "step": 7184, "epoch": 3 }, { "type": "loss", "content": 0.008841032162308693, "timestamp": "2025-09-10 02:33:15.941421", "step": 7185, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:15.970682", "step": 7185, "epoch": 3 }, { "type": "loss", "content": 0.000483514042571187, "timestamp": "2025-09-10 02:33:15.972605", "step": 7186, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:16.001450", "step": 7186, "epoch": 3 }, { "type": "loss", "content": 5.5209828133229166e-05, "timestamp": "2025-09-10 02:33:16.003340", "step": 7187, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:33:16.034228", "step": 7187, "epoch": 3 }, { "type": "loss", "content": 0.00037289224565029144, "timestamp": "2025-09-10 02:33:16.057735", "step": 7188, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:16.087185", "step": 7188, "epoch": 3 }, { "type": "loss", "content": 0.0008058823295868933, "timestamp": "2025-09-10 02:33:16.089214", "step": 7189, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:16.118354", "step": 7189, "epoch": 3 }, { "type": "loss", "content": 5.910907202633098e-05, "timestamp": "2025-09-10 02:33:16.120523", "step": 7190, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:33:16.149531", "step": 7190, "epoch": 3 }, { "type": "loss", "content": 0.000479226466268301, "timestamp": "2025-09-10 02:33:16.151451", "step": 7191, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:16.180606", "step": 7191, "epoch": 3 }, { "type": "loss", "content": 0.00018494235700927675, "timestamp": "2025-09-10 02:33:16.203847", "step": 7192, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:16.238089", "step": 7192, "epoch": 3 }, { "type": "loss", "content": 0.00022544446983374655, "timestamp": "2025-09-10 02:33:16.239676", "step": 7193, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:16.279322", "step": 7193, "epoch": 3 }, { "type": "loss", "content": 0.0011998852714896202, "timestamp": "2025-09-10 02:33:16.281055", "step": 7194, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:16.322118", "step": 7194, "epoch": 3 }, { "type": "loss", "content": 0.0002667410299181938, "timestamp": "2025-09-10 02:33:16.323774", "step": 7195, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:16.360392", "step": 7195, "epoch": 3 }, { "type": "loss", "content": 0.00021100246522109956, "timestamp": "2025-09-10 02:33:16.383923", "step": 7196, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:16.425492", "step": 7196, "epoch": 3 }, { "type": "loss", "content": 5.4788488341728225e-05, "timestamp": "2025-09-10 02:33:16.427338", "step": 7197, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:16.466318", "step": 7197, "epoch": 3 }, { "type": "loss", "content": 0.008930227719247341, "timestamp": "2025-09-10 02:33:16.468183", "step": 7198, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:16.505984", "step": 7198, "epoch": 3 }, { "type": "loss", "content": 0.000940874801017344, "timestamp": "2025-09-10 02:33:16.507819", "step": 7199, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:16.541318", "step": 7199, "epoch": 3 }, { "type": "loss", "content": 0.0004326365306042135, "timestamp": "2025-09-10 02:33:16.564740", "step": 7200, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:16.593817", "step": 7200, "epoch": 3 }, { "type": "loss", "content": 0.0010036567691713572, "timestamp": "2025-09-10 02:33:16.599972", "step": 7201, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:16.629347", "step": 7201, "epoch": 3 }, { "type": "loss", "content": 0.0004632103373296559, "timestamp": "2025-09-10 02:33:16.636037", "step": 7202, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:16.668183", "step": 7202, "epoch": 3 }, { "type": "loss", "content": 0.000718653725925833, "timestamp": "2025-09-10 02:33:16.676007", "step": 7203, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:33:16.710080", "step": 7203, "epoch": 3 }, { "type": "loss", "content": 0.0009695276385173202, "timestamp": "2025-09-10 02:33:16.733599", "step": 7204, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:16.774220", "step": 7204, "epoch": 3 }, { "type": "loss", "content": 9.65236104093492e-05, "timestamp": "2025-09-10 02:33:16.779725", "step": 7205, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:16.812169", "step": 7205, "epoch": 3 }, { "type": "loss", "content": 0.00014549396291840822, "timestamp": "2025-09-10 02:33:16.813981", "step": 7206, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:33:16.849532", "step": 7206, "epoch": 3 }, { "type": "loss", "content": 0.0021547910291701555, "timestamp": "2025-09-10 02:33:16.851563", "step": 7207, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:16.880700", "step": 7207, "epoch": 3 }, { "type": "loss", "content": 0.000640452781226486, "timestamp": "2025-09-10 02:33:16.904085", "step": 7208, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:16.936703", "step": 7208, "epoch": 3 }, { "type": "loss", "content": 0.00038810717524029315, "timestamp": "2025-09-10 02:33:16.939812", "step": 7209, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:16.968807", "step": 7209, "epoch": 3 }, { "type": "loss", "content": 0.0013020496116951108, "timestamp": "2025-09-10 02:33:16.970418", "step": 7210, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:16.999788", "step": 7210, "epoch": 3 }, { "type": "loss", "content": 0.00020852847956120968, "timestamp": "2025-09-10 02:33:17.001413", "step": 7211, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:17.030282", "step": 7211, "epoch": 3 }, { "type": "loss", "content": 0.0005294812144711614, "timestamp": "2025-09-10 02:33:17.053668", "step": 7212, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:17.082684", "step": 7212, "epoch": 3 }, { "type": "loss", "content": 0.00015804909344296902, "timestamp": "2025-09-10 02:33:17.084679", "step": 7213, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:17.113574", "step": 7213, "epoch": 3 }, { "type": "loss", "content": 0.03607497364282608, "timestamp": "2025-09-10 02:33:17.115833", "step": 7214, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:17.148880", "step": 7214, "epoch": 3 }, { "type": "loss", "content": 0.0005641308380290866, "timestamp": "2025-09-10 02:33:17.158129", "step": 7215, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:33:17.190234", "step": 7215, "epoch": 3 }, { "type": "loss", "content": 0.0009306627907790244, "timestamp": "2025-09-10 02:33:17.213721", "step": 7216, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:17.245350", "step": 7216, "epoch": 3 }, { "type": "loss", "content": 0.000797395478002727, "timestamp": "2025-09-10 02:33:17.247450", "step": 7217, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:33:17.276695", "step": 7217, "epoch": 3 }, { "type": "loss", "content": 0.01312278863042593, "timestamp": "2025-09-10 02:33:17.280277", "step": 7218, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:33:17.311221", "step": 7218, "epoch": 3 }, { "type": "loss", "content": 0.00019354607502464205, "timestamp": "2025-09-10 02:33:17.313265", "step": 7219, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:17.342798", "step": 7219, "epoch": 3 }, { "type": "loss", "content": 0.00018217017350252718, "timestamp": "2025-09-10 02:33:17.366426", "step": 7220, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:33:17.396146", "step": 7220, "epoch": 3 }, { "type": "loss", "content": 0.00048028648598119617, "timestamp": "2025-09-10 02:33:17.397984", "step": 7221, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:17.426880", "step": 7221, "epoch": 3 }, { "type": "loss", "content": 0.0011663679033517838, "timestamp": "2025-09-10 02:33:17.428664", "step": 7222, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:17.457813", "step": 7222, "epoch": 3 }, { "type": "loss", "content": 0.0005598780116997659, "timestamp": "2025-09-10 02:33:17.459676", "step": 7223, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:17.488698", "step": 7223, "epoch": 3 }, { "type": "loss", "content": 0.0002811321464832872, "timestamp": "2025-09-10 02:33:17.512109", "step": 7224, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:17.541059", "step": 7224, "epoch": 3 }, { "type": "loss", "content": 0.00015632924623787403, "timestamp": "2025-09-10 02:33:17.542947", "step": 7225, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 128 ], "flops": 3797092544000 }, "timestamp": "2025-09-10 02:33:17.572000", "step": 7225, "epoch": 3 }, { "type": "loss", "content": 0.01196103822439909, "timestamp": "2025-09-10 02:33:17.574200", "step": 7226, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:17.603650", "step": 7226, "epoch": 3 }, { "type": "loss", "content": 5.215074270381592e-05, "timestamp": "2025-09-10 02:33:17.605374", "step": 7227, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:17.634926", "step": 7227, "epoch": 3 }, { "type": "loss", "content": 0.0003372817882336676, "timestamp": "2025-09-10 02:33:17.658472", "step": 7228, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:33:17.688762", "step": 7228, "epoch": 3 }, { "type": "loss", "content": 0.00027380246319808066, "timestamp": "2025-09-10 02:33:17.690589", "step": 7229, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:33:17.720040", "step": 7229, "epoch": 3 }, { "type": "loss", "content": 0.001981860725209117, "timestamp": "2025-09-10 02:33:17.721710", "step": 7230, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:17.750254", "step": 7230, "epoch": 3 }, { "type": "loss", "content": 0.004536677151918411, "timestamp": "2025-09-10 02:33:17.751864", "step": 7231, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:17.780717", "step": 7231, "epoch": 3 }, { "type": "loss", "content": 0.0028503385838121176, "timestamp": "2025-09-10 02:33:17.804370", "step": 7232, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:17.833782", "step": 7232, "epoch": 3 }, { "type": "loss", "content": 0.00026749540120363235, "timestamp": "2025-09-10 02:33:17.835843", "step": 7233, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:17.864780", "step": 7233, "epoch": 3 }, { "type": "loss", "content": 0.0003853130911011249, "timestamp": "2025-09-10 02:33:17.866775", "step": 7234, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:17.895639", "step": 7234, "epoch": 3 }, { "type": "loss", "content": 0.0024504235479980707, "timestamp": "2025-09-10 02:33:17.897470", "step": 7235, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:17.926693", "step": 7235, "epoch": 3 }, { "type": "loss", "content": 0.000501504517160356, "timestamp": "2025-09-10 02:33:17.950055", "step": 7236, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:17.978974", "step": 7236, "epoch": 3 }, { "type": "loss", "content": 0.002365201013162732, "timestamp": "2025-09-10 02:33:17.980834", "step": 7237, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:18.009879", "step": 7237, "epoch": 3 }, { "type": "loss", "content": 0.00019209722813684493, "timestamp": "2025-09-10 02:33:18.012022", "step": 7238, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:18.040879", "step": 7238, "epoch": 3 }, { "type": "loss", "content": 0.0002008116280194372, "timestamp": "2025-09-10 02:33:18.042859", "step": 7239, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:18.071853", "step": 7239, "epoch": 3 }, { "type": "loss", "content": 0.0011329955887049437, "timestamp": "2025-09-10 02:33:18.095341", "step": 7240, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:18.124024", "step": 7240, "epoch": 3 }, { "type": "loss", "content": 0.001901619485579431, "timestamp": "2025-09-10 02:33:18.125882", "step": 7241, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:18.154340", "step": 7241, "epoch": 3 }, { "type": "loss", "content": 0.00016872587730176747, "timestamp": "2025-09-10 02:33:18.156169", "step": 7242, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:18.184947", "step": 7242, "epoch": 3 }, { "type": "loss", "content": 0.00016339278954546899, "timestamp": "2025-09-10 02:33:18.186960", "step": 7243, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:18.223386", "step": 7243, "epoch": 3 }, { "type": "loss", "content": 0.0001435885496903211, "timestamp": "2025-09-10 02:33:18.246774", "step": 7244, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:18.284002", "step": 7244, "epoch": 3 }, { "type": "loss", "content": 0.0005802605883218348, "timestamp": "2025-09-10 02:33:18.285891", "step": 7245, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:33:18.325737", "step": 7245, "epoch": 3 }, { "type": "loss", "content": 0.0002150183281628415, "timestamp": "2025-09-10 02:33:18.327545", "step": 7246, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:18.363723", "step": 7246, "epoch": 3 }, { "type": "loss", "content": 0.0005969787598587573, "timestamp": "2025-09-10 02:33:18.365507", "step": 7247, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:18.406517", "step": 7247, "epoch": 3 }, { "type": "loss", "content": 0.00018962433387059718, "timestamp": "2025-09-10 02:33:18.429917", "step": 7248, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:33:18.467873", "step": 7248, "epoch": 3 }, { "type": "loss", "content": 0.0004311216180212796, "timestamp": "2025-09-10 02:33:18.469742", "step": 7249, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:18.501204", "step": 7249, "epoch": 3 }, { "type": "loss", "content": 0.00030078229610808194, "timestamp": "2025-09-10 02:33:18.503033", "step": 7250, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:18.531969", "step": 7250, "epoch": 3 }, { "type": "loss", "content": 0.001691844081506133, "timestamp": "2025-09-10 02:33:18.533836", "step": 7251, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:18.562872", "step": 7251, "epoch": 3 }, { "type": "loss", "content": 0.00035047222627326846, "timestamp": "2025-09-10 02:33:18.586471", "step": 7252, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:18.616331", "step": 7252, "epoch": 3 }, { "type": "loss", "content": 0.0048087830655276775, "timestamp": "2025-09-10 02:33:18.618190", "step": 7253, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:18.647803", "step": 7253, "epoch": 3 }, { "type": "loss", "content": 0.0001972487661987543, "timestamp": "2025-09-10 02:33:18.649643", "step": 7254, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:33:18.679033", "step": 7254, "epoch": 3 }, { "type": "loss", "content": 0.016567738726735115, "timestamp": "2025-09-10 02:33:18.680853", "step": 7255, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:18.709967", "step": 7255, "epoch": 3 }, { "type": "loss", "content": 0.0005024011479690671, "timestamp": "2025-09-10 02:33:18.733329", "step": 7256, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:18.762432", "step": 7256, "epoch": 3 }, { "type": "loss", "content": 0.00013165673590265214, "timestamp": "2025-09-10 02:33:18.764238", "step": 7257, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:18.794587", "step": 7257, "epoch": 3 }, { "type": "loss", "content": 0.00011971175990765914, "timestamp": "2025-09-10 02:33:18.796424", "step": 7258, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:33:18.832080", "step": 7258, "epoch": 3 }, { "type": "loss", "content": 0.00018842818099074066, "timestamp": "2025-09-10 02:33:18.833896", "step": 7259, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:18.863340", "step": 7259, "epoch": 3 }, { "type": "loss", "content": 0.0001055440297932364, "timestamp": "2025-09-10 02:33:18.886684", "step": 7260, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:18.915970", "step": 7260, "epoch": 3 }, { "type": "loss", "content": 0.0024492177180945873, "timestamp": "2025-09-10 02:33:18.917834", "step": 7261, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:18.946317", "step": 7261, "epoch": 3 }, { "type": "loss", "content": 0.0010057402541860938, "timestamp": "2025-09-10 02:33:18.948411", "step": 7262, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:33:18.977942", "step": 7262, "epoch": 3 }, { "type": "loss", "content": 0.000282501510810107, "timestamp": "2025-09-10 02:33:18.979915", "step": 7263, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:19.008539", "step": 7263, "epoch": 3 }, { "type": "loss", "content": 0.00039048882899805903, "timestamp": "2025-09-10 02:33:19.032076", "step": 7264, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:19.061141", "step": 7264, "epoch": 3 }, { "type": "loss", "content": 0.00016508702537976205, "timestamp": "2025-09-10 02:33:19.063093", "step": 7265, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:19.091554", "step": 7265, "epoch": 3 }, { "type": "loss", "content": 0.0063108946196734905, "timestamp": "2025-09-10 02:33:19.093607", "step": 7266, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:19.122885", "step": 7266, "epoch": 3 }, { "type": "loss", "content": 0.0014061091933399439, "timestamp": "2025-09-10 02:33:19.124818", "step": 7267, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:19.153332", "step": 7267, "epoch": 3 }, { "type": "loss", "content": 0.00014290912076830864, "timestamp": "2025-09-10 02:33:19.176671", "step": 7268, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:33:19.205694", "step": 7268, "epoch": 3 }, { "type": "loss", "content": 0.0021674942690879107, "timestamp": "2025-09-10 02:33:19.207573", "step": 7269, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:19.236012", "step": 7269, "epoch": 3 }, { "type": "loss", "content": 4.766025813296437e-05, "timestamp": "2025-09-10 02:33:19.237925", "step": 7270, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:19.266853", "step": 7270, "epoch": 3 }, { "type": "loss", "content": 0.00015322669059969485, "timestamp": "2025-09-10 02:33:19.268882", "step": 7271, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 80 ], "flops": 2373281365952 }, "timestamp": "2025-09-10 02:33:19.297662", "step": 7271, "epoch": 3 }, { "type": "loss", "content": 0.010051853954792023, "timestamp": "2025-09-10 02:33:19.321165", "step": 7272, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:19.350138", "step": 7272, "epoch": 3 }, { "type": "loss", "content": 0.0027771706227213144, "timestamp": "2025-09-10 02:33:19.351844", "step": 7273, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:19.380153", "step": 7273, "epoch": 3 }, { "type": "loss", "content": 0.00017319263133686036, "timestamp": "2025-09-10 02:33:19.381912", "step": 7274, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:19.410927", "step": 7274, "epoch": 3 }, { "type": "loss", "content": 0.008278073742985725, "timestamp": "2025-09-10 02:33:19.412828", "step": 7275, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:19.442168", "step": 7275, "epoch": 3 }, { "type": "loss", "content": 0.0010432031704112887, "timestamp": "2025-09-10 02:33:19.465498", "step": 7276, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 112 ], "flops": 3322488817984 }, "timestamp": "2025-09-10 02:33:19.494858", "step": 7276, "epoch": 3 }, { "type": "loss", "content": 0.0011932385386899114, "timestamp": "2025-09-10 02:33:19.496498", "step": 7277, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:19.525258", "step": 7277, "epoch": 3 }, { "type": "loss", "content": 0.00030725193209946156, "timestamp": "2025-09-10 02:33:19.526737", "step": 7278, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:19.555431", "step": 7278, "epoch": 3 }, { "type": "loss", "content": 0.00044731449452228844, "timestamp": "2025-09-10 02:33:19.557418", "step": 7279, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:19.586302", "step": 7279, "epoch": 3 }, { "type": "loss", "content": 0.0009866936597973108, "timestamp": "2025-09-10 02:33:19.609848", "step": 7280, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:19.638582", "step": 7280, "epoch": 3 }, { "type": "loss", "content": 0.00012599120964296162, "timestamp": "2025-09-10 02:33:19.640585", "step": 7281, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:19.669761", "step": 7281, "epoch": 3 }, { "type": "loss", "content": 0.000897696299944073, "timestamp": "2025-09-10 02:33:19.671547", "step": 7282, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:19.700172", "step": 7282, "epoch": 3 }, { "type": "loss", "content": 5.4524887673323974e-05, "timestamp": "2025-09-10 02:33:19.702176", "step": 7283, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:19.730697", "step": 7283, "epoch": 3 }, { "type": "loss", "content": 6.54960676911287e-05, "timestamp": "2025-09-10 02:33:19.754038", "step": 7284, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:19.782691", "step": 7284, "epoch": 3 }, { "type": "loss", "content": 7.324916805373505e-05, "timestamp": "2025-09-10 02:33:19.784657", "step": 7285, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:19.813236", "step": 7285, "epoch": 3 }, { "type": "loss", "content": 0.001871999935247004, "timestamp": "2025-09-10 02:33:19.815186", "step": 7286, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:19.844561", "step": 7286, "epoch": 3 }, { "type": "loss", "content": 0.0006715627387166023, "timestamp": "2025-09-10 02:33:19.846524", "step": 7287, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:19.875929", "step": 7287, "epoch": 3 }, { "type": "loss", "content": 0.0017241544555872679, "timestamp": "2025-09-10 02:33:19.899361", "step": 7288, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:19.928722", "step": 7288, "epoch": 3 }, { "type": "loss", "content": 7.862582424422726e-05, "timestamp": "2025-09-10 02:33:19.930836", "step": 7289, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:19.959634", "step": 7289, "epoch": 3 }, { "type": "loss", "content": 9.589677210897207e-05, "timestamp": "2025-09-10 02:33:19.961331", "step": 7290, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:19.990243", "step": 7290, "epoch": 3 }, { "type": "loss", "content": 0.000171039835549891, "timestamp": "2025-09-10 02:33:19.992001", "step": 7291, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:20.021008", "step": 7291, "epoch": 3 }, { "type": "loss", "content": 0.0294545479118824, "timestamp": "2025-09-10 02:33:20.044219", "step": 7292, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:20.073064", "step": 7292, "epoch": 3 }, { "type": "loss", "content": 0.0008927856688387692, "timestamp": "2025-09-10 02:33:20.074867", "step": 7293, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:20.103592", "step": 7293, "epoch": 3 }, { "type": "loss", "content": 5.344643795979209e-05, "timestamp": "2025-09-10 02:33:20.105201", "step": 7294, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:20.133819", "step": 7294, "epoch": 3 }, { "type": "loss", "content": 0.010886363685131073, "timestamp": "2025-09-10 02:33:20.135483", "step": 7295, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:20.164693", "step": 7295, "epoch": 3 }, { "type": "loss", "content": 6.454643153119832e-05, "timestamp": "2025-09-10 02:33:20.188123", "step": 7296, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 96 ], "batch_size": 8, "flops": 1898404492032 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 5, 80 ], "batch_size": 8, "flops": 1582003754624 } ], "timestamp": "2025-09-10 02:33:22.036152", "step": 7296, "epoch": 3 }, { "type": "pplx", "content": 2553532.4689230984, "timestamp": "2025-09-10 02:33:22.037804", "step": 7296, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:22.065870", "step": 7296, "epoch": 3 }, { "type": "loss", "content": 0.0003278447256889194, "timestamp": "2025-09-10 02:33:22.067523", "step": 7297, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:22.096448", "step": 7297, "epoch": 3 }, { "type": "loss", "content": 0.0003597289905883372, "timestamp": "2025-09-10 02:33:22.098274", "step": 7298, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:22.126866", "step": 7298, "epoch": 3 }, { "type": "loss", "content": 0.00020632542145904154, "timestamp": "2025-09-10 02:33:22.128565", "step": 7299, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:22.157032", "step": 7299, "epoch": 3 }, { "type": "loss", "content": 0.0006207999540492892, "timestamp": "2025-09-10 02:33:22.180334", "step": 7300, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:22.212634", "step": 7300, "epoch": 3 }, { "type": "loss", "content": 0.00022598991927225143, "timestamp": "2025-09-10 02:33:22.214539", "step": 7301, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:22.247024", "step": 7301, "epoch": 3 }, { "type": "loss", "content": 9.620003402233124e-05, "timestamp": "2025-09-10 02:33:22.248826", "step": 7302, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:22.287957", "step": 7302, "epoch": 3 }, { "type": "loss", "content": 0.0012993158306926489, "timestamp": "2025-09-10 02:33:22.289807", "step": 7303, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:22.330945", "step": 7303, "epoch": 3 }, { "type": "loss", "content": 0.012171934358775616, "timestamp": "2025-09-10 02:33:22.355472", "step": 7304, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:22.393027", "step": 7304, "epoch": 3 }, { "type": "loss", "content": 0.00029418981284834445, "timestamp": "2025-09-10 02:33:22.394907", "step": 7305, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:22.435429", "step": 7305, "epoch": 3 }, { "type": "loss", "content": 0.0052917273715138435, "timestamp": "2025-09-10 02:33:22.437309", "step": 7306, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 4, 96 ], "flops": 2847885091968 }, "timestamp": "2025-09-10 02:33:22.474407", "step": 7306, "epoch": 3 }, { "type": "loss", "content": 8.426891872659326e-05, "timestamp": "2025-09-10 02:33:22.476128", "step": 7307, "epoch": 3 }, { "type": "flops", "content": { "type": "train", "batch_dim": [ 1, 80 ], "flops": 593517404912 }, "timestamp": "2025-09-10 02:33:22.506700", "step": 7307, "epoch": 3 }, { "type": "loss", "content": 1.3720730748900678e-05, "timestamp": "2025-09-10 02:33:22.530117", "step": 7308, "epoch": 3 }, { "type": "flops", "content": [ { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 48 ], "batch_size": 8, "flops": 949202279808 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 80 ], "batch_size": 8, "flops": 1582003754624 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 96 ], "batch_size": 8, "flops": 1898404492032 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 8, 64 ], "batch_size": 8, "flops": 1265603017216 }, { "type": "perplexity", "in_batch_dim": [ 5, 80 ], "batch_size": 8, "flops": 1582003754624 } ], "timestamp": "2025-09-10 02:33:24.407419", "step": 7308, "epoch": 3 }, { "type": "pplx", "content": 2536462.915071921, "timestamp": "2025-09-10 02:33:24.409286", "step": 7308, "epoch": 3 }, { "type": "best_pplx", "content": 2010640.6963496492, "timestamp": "2025-09-10 02:33:24.410280", "step": 7308, "epoch": 3 }, { "type": "best_step", "content": 5016, "timestamp": "2025-09-10 02:33:24.411283", "step": 7308, "epoch": 3 }, { "type": "total_pplx_flops", "content": 9808423376665600, "timestamp": "2025-09-10 02:33:24.412339", "step": 7308, "epoch": 3 }, { "type": "total_train_flops", "content": 20896705064436048, "timestamp": "2025-09-10 02:33:24.413721", "step": 7308, "epoch": 3 } ], "best_evals": { "pplx": { "score": 2010640.6963496492, "step": 5016 }, "rougel": { "precision": 0.7141687141687142, "recall": 0.7141687141687142, "fmeasure": 0.7141687141687142 } } }