| { | |
| "train/loss": 2.5625, | |
| "train/contrastive": 2.46875, | |
| "train/recons_loss": 0.55078125, | |
| "train/balance_loss": 3.859375, | |
| "train/balance_loss_contrastive": 2.84375, | |
| "train/balance_loss_recons": 1.015625, | |
| "train/contrastive_std": 3.390625, | |
| "train/recons_std": 0.064453125, | |
| "train/contrastive_min": 0.08349609375, | |
| "train/contrastive_max": 7.1875, | |
| "train/recons_min": 0.470703125, | |
| "train/recons_max": 0.64453125, | |
| "train/Qwen3_0.6B_layer_2": 0.609375, | |
| "train/Qwen3_0.6B_layer_4": 0.53515625, | |
| "train/Qwen3_1.7B_layer_2": 0.5078125, | |
| "train/Qwen3_1.7B_layer_4": 0.64453125, | |
| "train/Qwen3_4B_layer_2": 0.470703125, | |
| "train/Qwen3_4B_layer_4": 0.546875, | |
| "train/contrastives": null, | |
| "train/epoch": 1, | |
| "train/n_tokens": 9999360, | |
| "train/step": 9765 | |
| } |