bkjeon commited on Oct 9

Commit

639a5f8

verified ·

1 Parent(s): 716c900

Upload folder using huggingface_hub

Browse files

Files changed (18) hide show

.gitattributes +1 -0
checkpoint-1000/config.json +64 -0
checkpoint-1000/experiment_cfg/metadata.json +259 -0
checkpoint-1000/model-00001-of-00002.safetensors +3 -0
checkpoint-1000/model-00002-of-00002.safetensors +3 -0
checkpoint-1000/model.safetensors.index.json +0 -0
checkpoint-1000/optimizer.pt +3 -0
checkpoint-1000/rng_state.pth +3 -0
checkpoint-1000/scheduler.pt +3 -0
checkpoint-1000/trainer_state.json +734 -0
config.json +64 -0
eval_plot.png +3 -0
experiment_cfg/metadata.json +259 -0
model-00001-of-00002.safetensors +3 -0
model-00002-of-00002.safetensors +3 -0
model.safetensors.index.json +0 -0
trainer_state.json +743 -0
training_args.bin +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+eval_plot.png filter=lfs diff=lfs merge=lfs -text

checkpoint-1000/config.json ADDED Viewed

	@@ -0,0 +1,64 @@

+{
+  "action_dim": 32,
+  "action_head_cfg": {
+    "action_dim": 32,
+    "action_horizon": 16,
+    "add_pos_embed": true,
+    "backbone_embedding_dim": 2048,
+    "diffusion_model_cfg": {
+      "attention_head_dim": 48,
+      "cross_attention_dim": 2048,
+      "dropout": 0.2,
+      "final_dropout": true,
+      "interleave_self_attention": true,
+      "norm_type": "ada_norm",
+      "num_attention_heads": 32,
+      "num_layers": 16,
+      "output_dim": 1024,
+      "positional_embeddings": null
+    },
+    "hidden_size": 1024,
+    "input_embedding_dim": 1536,
+    "max_action_dim": 32,
+    "max_state_dim": 64,
+    "model_dtype": "float32",
+    "noise_beta_alpha": 1.5,
+    "noise_beta_beta": 1.0,
+    "noise_s": 0.999,
+    "num_inference_timesteps": 4,
+    "num_target_vision_tokens": 32,
+    "num_timestep_buckets": 1000,
+    "tune_diffusion_model": true,
+    "tune_projector": true,
+    "use_vlln": true,
+    "vl_self_attention_cfg": {
+      "attention_head_dim": 64,
+      "dropout": 0.2,
+      "final_dropout": true,
+      "num_attention_heads": 32,
+      "num_layers": 4,
+      "positional_embeddings": null
+    }
+  },
+  "action_horizon": 16,
+  "architectures": [
+    "GR00T_N1_5"
+  ],
+  "attn_implementation": null,
+  "backbone_cfg": {
+    "eagle_path": "NVEagle/eagle_er-qwen3_1_7B-Siglip2_400M_stage1_5_128gpu_er_v7_1mlp_nops",
+    "load_bf16": false,
+    "project_to_dim": null,
+    "reproject_vision": false,
+    "select_layer": 12,
+    "tune_llm": false,
+    "tune_visual": true,
+    "use_flash_attention": true
+  },
+  "compute_dtype": "bfloat16",
+  "hidden_size": 2048,
+  "model_dtype": "float32",
+  "model_type": "gr00t_n1_5",
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.51.3"
+}

checkpoint-1000/experiment_cfg/metadata.json ADDED Viewed

	@@ -0,0 +1,259 @@

+{
+    "oxe_droid": {
+        "statistics": {
+            "state": {
+                "eef_position": {
+                    "max": [
+                        0.46507203578948975,
+                        1.447200059890747,
+                        0.290057897567749
+                    ],
+                    "min": [
+                        -0.40590497851371765,
+                        -0.024546854197978973,
+                        -0.22337864339351654
+                    ],
+                    "mean": [
+                        0.05521393194794655,
+                        0.6310628056526184,
+                        -0.01189372967928648
+                    ],
+                    "std": [
+                        0.18417152762413025,
+                        0.2100915163755417,
+                        0.0770791620016098
+                    ],
+                    "q01": [
+                        -0.37289209306240084,
+                        0.14116937458515166,
+                        -0.1820696112513542
+                    ],
+                    "q99": [
+                        0.39412812441587447,
+                        1.09063316822052,
+                        0.1658253240585327
+                    ]
+                },
+                "eef_rotation": {
+                    "max": [
+                        -0.35259532928466797,
+                        0.8778502345085144,
+                        2.965937376022339
+                    ],
+                    "min": [
+                        -2.3446121215820312,
+                        -0.47837626934051514,
+                        1.515358328819275
+                    ],
+                    "mean": [
+                        -1.6130940914154053,
+                        -0.06260889768600464,
+                        2.2893757820129395
+                    ],
+                    "std": [
+                        0.3057422637939453,
+                        0.13727501034736633,
+                        0.18679431080818176
+                    ],
+                    "q01": [
+                        -2.151788368225098,
+                        -0.348803762793541,
+                        1.8247082424163819
+                    ],
+                    "q99": [
+                        -0.8719659841060661,
+                        0.32717462211847265,
+                        2.695855288505554
+                    ]
+                },
+                "gripper_position": {
+                    "max": [
+                        1.0660839080810547
+                    ],
+                    "min": [
+                        -0.862152636051178
+                    ],
+                    "mean": [
+                        0.08788274973630905
+                    ],
+                    "std": [
+                        0.2687181830406189
+                    ],
+                    "q01": [
+                        -0.6035337620973587
+                    ],
+                    "q99": [
+                        0.7679916465282439
+                    ]
+                }
+            },
+            "action": {
+                "eef_position_delta": {
+                    "max": [
+                        0.5925613045692444,
+                        0.8723994493484497,
+                        0.69416743516922
+                    ],
+                    "min": [
+                        -0.960589587688446,
+                        -0.8221790194511414,
+                        -0.8575763702392578
+                    ],
+                    "mean": [
+                        0.03402156010270119,
+                        -0.007893561385571957,
+                        0.0033362761605530977
+                    ],
+                    "std": [
+                        0.19854718446731567,
+                        0.15367349982261658,
+                        0.14636090397834778
+                    ],
+                    "q01": [
+                        -0.47471584677696227,
+                        -0.4062542155385018,
+                        -0.26967047452926635
+                    ],
+                    "q99": [
+                        0.4079366648197173,
+                        0.34532205790281273,
+                        0.4184985920786849
+                    ]
+                },
+                "eef_rotation_delta": {
+                    "max": [
+                        0.8240683674812317,
+                        0.7224147915840149,
+                        0.8612414598464966
+                    ],
+                    "min": [
+                        -0.8100513815879822,
+                        -0.8826454877853394,
+                        -0.9472323656082153
+                    ],
+                    "mean": [
+                        0.0014922608388587832,
+                        -0.029764438048005104,
+                        0.006220718380063772
+                    ],
+                    "std": [
+                        0.14293360710144043,
+                        0.16134193539619446,
+                        0.2356378585100174
+                    ],
+                    "q01": [
+                        -0.3907526931166649,
+                        -0.4691345453262329,
+                        -0.5505707603693009
+                    ],
+                    "q99": [
+                        0.37015672087669316,
+                        0.39511197239160506,
+                        0.573674650788307
+                    ]
+                },
+                "gripper_position": {
+                    "max": [
+                        1.0
+                    ],
+                    "min": [
+                        0.0
+                    ],
+                    "mean": [
+                        0.6008301973342896
+                    ],
+                    "std": [
+                        0.3934294879436493
+                    ],
+                    "q01": [
+                        0.07158590108156204
+                    ],
+                    "q99": [
+                        1.0
+                    ]
+                }
+            }
+        },
+        "modalities": {
+            "video": {
+                "exterior_image_1": {
+                    "resolution": [
+                        320,
+                        180
+                    ],
+                    "channels": 3,
+                    "fps": 15.0
+                },
+                "exterior_image_2": {
+                    "resolution": [
+                        320,
+                        180
+                    ],
+                    "channels": 3,
+                    "fps": 15.0
+                },
+                "wrist_image": {
+                    "resolution": [
+                        320,
+                        180
+                    ],
+                    "channels": 3,
+                    "fps": 15.0
+                }
+            },
+            "state": {
+                "eef_position": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        3
+                    ],
+                    "continuous": true
+                },
+                "eef_rotation": {
+                    "absolute": true,
+                    "rotation_type": "euler_angles_rpy",
+                    "shape": [
+                        3
+                    ],
+                    "continuous": true
+                },
+                "gripper_position": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                }
+            },
+            "action": {
+                "eef_position_delta": {
+                    "absolute": false,
+                    "rotation_type": null,
+                    "shape": [
+                        3
+                    ],
+                    "continuous": true
+                },
+                "eef_rotation_delta": {
+                    "absolute": false,
+                    "rotation_type": "axis_angle",
+                    "shape": [
+                        3
+                    ],
+                    "continuous": true
+                },
+                "gripper_position": {
+                    "absolute": false,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                }
+            }
+        },
+        "embodiment_tag": "oxe_droid"
+    }
+}

checkpoint-1000/model-00001-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:da5487b7f3546322ddae34bb07dccab41feae6c722565237dc73a0f96861062b
+size 4999367032

checkpoint-1000/model-00002-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c1f2def2be205cda90f1f730f18164dec31b074b0c482d18949dc4a78c32cce4
+size 2586705312

checkpoint-1000/model.safetensors.index.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-1000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e1db7f95cbcb58e11b8baef3025310e916ec907e6496a5d8090505b38423a179
+size 8550720062

checkpoint-1000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ac4c57336df09ab4aef5e91d6b2a5b850e4edf3cbcfc905a25d2fff14e5301d0
+size 14244

checkpoint-1000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4077036d99500a708f700f75da24d51b5300e184ad35fda49dc5a4df5596cca2
+size 1064

checkpoint-1000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,734 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.9267822736030829,
+  "eval_steps": 500,
+  "global_step": 1000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.019267822736030827,
+      "grad_norm": 1.7207869291305542,
+      "learning_rate": 1.8e-05,
+      "loss": 0.2916,
+      "step": 10
+    },
+    {
+      "epoch": 0.038535645472061654,
+      "grad_norm": 0.542586088180542,
+      "learning_rate": 3.8e-05,
+      "loss": 0.1243,
+      "step": 20
+    },
+    {
+      "epoch": 0.057803468208092484,
+      "grad_norm": 0.7696737051010132,
+      "learning_rate": 5.8e-05,
+      "loss": 0.1138,
+      "step": 30
+    },
+    {
+      "epoch": 0.07707129094412331,
+      "grad_norm": 0.524152398109436,
+      "learning_rate": 7.800000000000001e-05,
+      "loss": 0.1041,
+      "step": 40
+    },
+    {
+      "epoch": 0.09633911368015415,
+      "grad_norm": 0.4883207380771637,
+      "learning_rate": 9.8e-05,
+      "loss": 0.0938,
+      "step": 50
+    },
+    {
+      "epoch": 0.11560693641618497,
+      "grad_norm": 0.6807201504707336,
+      "learning_rate": 9.997785653888835e-05,
+      "loss": 0.1036,
+      "step": 60
+    },
+    {
+      "epoch": 0.1348747591522158,
+      "grad_norm": 0.5879040360450745,
+      "learning_rate": 9.990133642141359e-05,
+      "loss": 0.0913,
+      "step": 70
+    },
+    {
+      "epoch": 0.15414258188824662,
+      "grad_norm": 0.2884378135204315,
+      "learning_rate": 9.977024992520602e-05,
+      "loss": 0.0937,
+      "step": 80
+    },
+    {
+      "epoch": 0.17341040462427745,
+      "grad_norm": 0.31237179040908813,
+      "learning_rate": 9.95847403914247e-05,
+      "loss": 0.0818,
+      "step": 90
+    },
+    {
+      "epoch": 0.1926782273603083,
+      "grad_norm": 0.5337116718292236,
+      "learning_rate": 9.934501067202117e-05,
+      "loss": 0.0871,
+      "step": 100
+    },
+    {
+      "epoch": 0.2119460500963391,
+      "grad_norm": 0.35781776905059814,
+      "learning_rate": 9.905132290792394e-05,
+      "loss": 0.0813,
+      "step": 110
+    },
+    {
+      "epoch": 0.23121387283236994,
+      "grad_norm": 0.41506218910217285,
+      "learning_rate": 9.870399824239117e-05,
+      "loss": 0.0807,
+      "step": 120
+    },
+    {
+      "epoch": 0.2504816955684008,
+      "grad_norm": 0.2967628836631775,
+      "learning_rate": 9.830341646984521e-05,
+      "loss": 0.0824,
+      "step": 130
+    },
+    {
+      "epoch": 0.2697495183044316,
+      "grad_norm": 0.31782740354537964,
+      "learning_rate": 9.785001562057309e-05,
+      "loss": 0.0756,
+      "step": 140
+    },
+    {
+      "epoch": 0.28901734104046245,
+      "grad_norm": 0.34691065549850464,
+      "learning_rate": 9.734429148174675e-05,
+      "loss": 0.084,
+      "step": 150
+    },
+    {
+      "epoch": 0.30828516377649323,
+      "grad_norm": 0.48853012919425964,
+      "learning_rate": 9.6786797055287e-05,
+      "loss": 0.0832,
+      "step": 160
+    },
+    {
+      "epoch": 0.32755298651252407,
+      "grad_norm": 0.39761826395988464,
+      "learning_rate": 9.617814195316411e-05,
+      "loss": 0.0773,
+      "step": 170
+    },
+    {
+      "epoch": 0.3468208092485549,
+      "grad_norm": 0.41890355944633484,
+      "learning_rate": 9.551899173079607e-05,
+      "loss": 0.072,
+      "step": 180
+    },
+    {
+      "epoch": 0.36608863198458574,
+      "grad_norm": 0.5497244596481323,
+      "learning_rate": 9.481006715927351e-05,
+      "loss": 0.0733,
+      "step": 190
+    },
+    {
+      "epoch": 0.3853564547206166,
+      "grad_norm": 0.28932520747184753,
+      "learning_rate": 9.405214343720707e-05,
+      "loss": 0.0814,
+      "step": 200
+    },
+    {
+      "epoch": 0.4046242774566474,
+      "grad_norm": 0.5443249344825745,
+      "learning_rate": 9.32460493430591e-05,
+      "loss": 0.0789,
+      "step": 210
+    },
+    {
+      "epoch": 0.4238921001926782,
+      "grad_norm": 0.5669806003570557,
+      "learning_rate": 9.239266632888659e-05,
+      "loss": 0.0717,
+      "step": 220
+    },
+    {
+      "epoch": 0.44315992292870904,
+      "grad_norm": 0.3382496237754822,
+      "learning_rate": 9.14929275564863e-05,
+      "loss": 0.0714,
+      "step": 230
+    },
+    {
+      "epoch": 0.4624277456647399,
+      "grad_norm": 0.47498270869255066,
+      "learning_rate": 9.0547816876996e-05,
+      "loss": 0.0691,
+      "step": 240
+    },
+    {
+      "epoch": 0.4816955684007707,
+      "grad_norm": 0.4482809007167816,
+      "learning_rate": 8.955836775506776e-05,
+      "loss": 0.0694,
+      "step": 250
+    },
+    {
+      "epoch": 0.5009633911368016,
+      "grad_norm": 0.5501825213432312,
+      "learning_rate": 8.852566213878947e-05,
+      "loss": 0.0686,
+      "step": 260
+    },
+    {
+      "epoch": 0.5202312138728323,
+      "grad_norm": 0.3987138867378235,
+      "learning_rate": 8.745082927659047e-05,
+      "loss": 0.078,
+      "step": 270
+    },
+    {
+      "epoch": 0.5394990366088632,
+      "grad_norm": 0.28167715668678284,
+      "learning_rate": 8.633504448242505e-05,
+      "loss": 0.0758,
+      "step": 280
+    },
+    {
+      "epoch": 0.558766859344894,
+      "grad_norm": 0.24536319077014923,
+      "learning_rate": 8.517952785058385e-05,
+      "loss": 0.0747,
+      "step": 290
+    },
+    {
+      "epoch": 0.5780346820809249,
+      "grad_norm": 0.2790259122848511,
+      "learning_rate": 8.398554292153866e-05,
+      "loss": 0.0681,
+      "step": 300
+    },
+    {
+      "epoch": 0.5973025048169557,
+      "grad_norm": 0.4405696988105774,
+      "learning_rate": 8.275439530027948e-05,
+      "loss": 0.0713,
+      "step": 310
+    },
+    {
+      "epoch": 0.6165703275529865,
+      "grad_norm": 0.3930000066757202,
+      "learning_rate": 8.148743122865463e-05,
+      "loss": 0.0633,
+      "step": 320
+    },
+    {
+      "epoch": 0.6358381502890174,
+      "grad_norm": 0.39871180057525635,
+      "learning_rate": 8.018603611327504e-05,
+      "loss": 0.0735,
+      "step": 330
+    },
+    {
+      "epoch": 0.6551059730250481,
+      "grad_norm": 0.43595343828201294,
+      "learning_rate": 7.88516330105925e-05,
+      "loss": 0.0651,
+      "step": 340
+    },
+    {
+      "epoch": 0.674373795761079,
+      "grad_norm": 0.3049931824207306,
+      "learning_rate": 7.748568107080832e-05,
+      "loss": 0.0641,
+      "step": 350
+    },
+    {
+      "epoch": 0.6936416184971098,
+      "grad_norm": 0.2661554515361786,
+      "learning_rate": 7.608967394231387e-05,
+      "loss": 0.0615,
+      "step": 360
+    },
+    {
+      "epoch": 0.7129094412331407,
+      "grad_norm": 0.23182271420955658,
+      "learning_rate": 7.466513813840825e-05,
+      "loss": 0.0641,
+      "step": 370
+    },
+    {
+      "epoch": 0.7321772639691715,
+      "grad_norm": 0.36455002427101135,
+      "learning_rate": 7.32136313680782e-05,
+      "loss": 0.0694,
+      "step": 380
+    },
+    {
+      "epoch": 0.7514450867052023,
+      "grad_norm": 0.37729915976524353,
+      "learning_rate": 7.173674083266624e-05,
+      "loss": 0.0657,
+      "step": 390
+    },
+    {
+      "epoch": 0.7707129094412332,
+      "grad_norm": 0.3024880290031433,
+      "learning_rate": 7.023608149028937e-05,
+      "loss": 0.0595,
+      "step": 400
+    },
+    {
+      "epoch": 0.789980732177264,
+      "grad_norm": 0.31104758381843567,
+      "learning_rate": 6.871329428990602e-05,
+      "loss": 0.0681,
+      "step": 410
+    },
+    {
+      "epoch": 0.8092485549132948,
+      "grad_norm": 0.350875586271286,
+      "learning_rate": 6.71700443769625e-05,
+      "loss": 0.061,
+      "step": 420
+    },
+    {
+      "epoch": 0.8285163776493256,
+      "grad_norm": 0.2729007303714752,
+      "learning_rate": 6.56080192725808e-05,
+      "loss": 0.0666,
+      "step": 430
+    },
+    {
+      "epoch": 0.8477842003853564,
+      "grad_norm": 0.29519274830818176,
+      "learning_rate": 6.402892702827916e-05,
+      "loss": 0.0617,
+      "step": 440
+    },
+    {
+      "epoch": 0.8670520231213873,
+      "grad_norm": 0.41105949878692627,
+      "learning_rate": 6.243449435824276e-05,
+      "loss": 0.0635,
+      "step": 450
+    },
+    {
+      "epoch": 0.8863198458574181,
+      "grad_norm": 0.39932650327682495,
+      "learning_rate": 6.0826464751186994e-05,
+      "loss": 0.0668,
+      "step": 460
+    },
+    {
+      "epoch": 0.905587668593449,
+      "grad_norm": 0.36617910861968994,
+      "learning_rate": 5.9206596563878357e-05,
+      "loss": 0.0642,
+      "step": 470
+    },
+    {
+      "epoch": 0.9248554913294798,
+      "grad_norm": 0.3059275150299072,
+      "learning_rate": 5.757666109839702e-05,
+      "loss": 0.0639,
+      "step": 480
+    },
+    {
+      "epoch": 0.9441233140655106,
+      "grad_norm": 0.3001484274864197,
+      "learning_rate": 5.5938440665244006e-05,
+      "loss": 0.0581,
+      "step": 490
+    },
+    {
+      "epoch": 0.9633911368015414,
+      "grad_norm": 0.3571004569530487,
+      "learning_rate": 5.4293726634410855e-05,
+      "loss": 0.0558,
+      "step": 500
+    },
+    {
+      "epoch": 0.9826589595375722,
+      "grad_norm": 0.3813907206058502,
+      "learning_rate": 5.264431747654284e-05,
+      "loss": 0.0618,
+      "step": 510
+    },
+    {
+      "epoch": 1.001926782273603,
+      "grad_norm": 0.29777514934539795,
+      "learning_rate": 5.0992016796337686e-05,
+      "loss": 0.0578,
+      "step": 520
+    },
+    {
+      "epoch": 1.0211946050096339,
+      "grad_norm": 0.28990840911865234,
+      "learning_rate": 4.93386313603304e-05,
+      "loss": 0.0613,
+      "step": 530
+    },
+    {
+      "epoch": 1.0404624277456647,
+      "grad_norm": 0.43356871604919434,
+      "learning_rate": 4.7685969121220456e-05,
+      "loss": 0.057,
+      "step": 540
+    },
+    {
+      "epoch": 1.0597302504816957,
+      "grad_norm": 0.2468729466199875,
+      "learning_rate": 4.60358372409022e-05,
+      "loss": 0.053,
+      "step": 550
+    },
+    {
+      "epoch": 1.0789980732177264,
+      "grad_norm": 0.33707353472709656,
+      "learning_rate": 4.439004011435979e-05,
+      "loss": 0.0523,
+      "step": 560
+    },
+    {
+      "epoch": 1.0982658959537572,
+      "grad_norm": 0.3425613343715668,
+      "learning_rate": 4.275037739658771e-05,
+      "loss": 0.0533,
+      "step": 570
+    },
+    {
+      "epoch": 1.117533718689788,
+      "grad_norm": 0.21212391555309296,
+      "learning_rate": 4.111864203469457e-05,
+      "loss": 0.0506,
+      "step": 580
+    },
+    {
+      "epoch": 1.1368015414258188,
+      "grad_norm": 0.2778816223144531,
+      "learning_rate": 3.949661830734172e-05,
+      "loss": 0.0575,
+      "step": 590
+    },
+    {
+      "epoch": 1.1560693641618498,
+      "grad_norm": 0.2917283773422241,
+      "learning_rate": 3.788607987366069e-05,
+      "loss": 0.0571,
+      "step": 600
+    },
+    {
+      "epoch": 1.1753371868978806,
+      "grad_norm": 0.27486225962638855,
+      "learning_rate": 3.628878783378302e-05,
+      "loss": 0.056,
+      "step": 610
+    },
+    {
+      "epoch": 1.1946050096339114,
+      "grad_norm": 0.40446701645851135,
+      "learning_rate": 3.470648880310313e-05,
+      "loss": 0.0529,
+      "step": 620
+    },
+    {
+      "epoch": 1.2138728323699421,
+      "grad_norm": 0.25892701745033264,
+      "learning_rate": 3.3140913002379995e-05,
+      "loss": 0.0504,
+      "step": 630
+    },
+    {
+      "epoch": 1.2331406551059731,
+      "grad_norm": 0.26697343587875366,
+      "learning_rate": 3.1593772365766105e-05,
+      "loss": 0.0509,
+      "step": 640
+    },
+    {
+      "epoch": 1.252408477842004,
+      "grad_norm": 0.2691023051738739,
+      "learning_rate": 3.006675866883275e-05,
+      "loss": 0.0576,
+      "step": 650
+    },
+    {
+      "epoch": 1.2716763005780347,
+      "grad_norm": 0.2726045250892639,
+      "learning_rate": 2.8561541678638142e-05,
+      "loss": 0.051,
+      "step": 660
+    },
+    {
+      "epoch": 1.2909441233140655,
+      "grad_norm": 0.3852327764034271,
+      "learning_rate": 2.707976732786166e-05,
+      "loss": 0.0535,
+      "step": 670
+    },
+    {
+      "epoch": 1.3102119460500963,
+      "grad_norm": 0.2920765280723572,
+      "learning_rate": 2.562305591500069e-05,
+      "loss": 0.0492,
+      "step": 680
+    },
+    {
+      "epoch": 1.3294797687861273,
+      "grad_norm": 0.1913856863975525,
+      "learning_rate": 2.419300033259798e-05,
+      "loss": 0.0507,
+      "step": 690
+    },
+    {
+      "epoch": 1.348747591522158,
+      "grad_norm": 0.29561716318130493,
+      "learning_rate": 2.279116432543705e-05,
+      "loss": 0.0499,
+      "step": 700
+    },
+    {
+      "epoch": 1.3680154142581888,
+      "grad_norm": 0.4290018379688263,
+      "learning_rate": 2.1419080780610123e-05,
+      "loss": 0.0571,
+      "step": 710
+    },
+    {
+      "epoch": 1.3872832369942196,
+      "grad_norm": 0.2543991506099701,
+      "learning_rate": 2.0078250051328784e-05,
+      "loss": 0.0524,
+      "step": 720
+    },
+    {
+      "epoch": 1.4065510597302504,
+      "grad_norm": 0.2487826943397522,
+      "learning_rate": 1.877013831630961e-05,
+      "loss": 0.0539,
+      "step": 730
+    },
+    {
+      "epoch": 1.4258188824662814,
+      "grad_norm": 0.3550814390182495,
+      "learning_rate": 1.749617597652934e-05,
+      "loss": 0.0436,
+      "step": 740
+    },
+    {
+      "epoch": 1.4450867052023122,
+      "grad_norm": 0.2873481810092926,
+      "learning_rate": 1.62577560911024e-05,
+      "loss": 0.0463,
+      "step": 750
+    },
+    {
+      "epoch": 1.464354527938343,
+      "grad_norm": 0.22322338819503784,
+      "learning_rate": 1.5056232853991209e-05,
+      "loss": 0.0427,
+      "step": 760
+    },
+    {
+      "epoch": 1.4836223506743738,
+      "grad_norm": 0.23233197629451752,
+      "learning_rate": 1.389292011321498e-05,
+      "loss": 0.0427,
+      "step": 770
+    },
+    {
+      "epoch": 1.5028901734104045,
+      "grad_norm": 0.25211092829704285,
+      "learning_rate": 1.2769089934176126e-05,
+      "loss": 0.0456,
+      "step": 780
+    },
+    {
+      "epoch": 1.5221579961464355,
+      "grad_norm": 0.19579973816871643,
+      "learning_rate": 1.1685971208675539e-05,
+      "loss": 0.0447,
+      "step": 790
+    },
+    {
+      "epoch": 1.5414258188824663,
+      "grad_norm": 0.2609504461288452,
+      "learning_rate": 1.0644748311137376e-05,
+      "loss": 0.0467,
+      "step": 800
+    },
+    {
+      "epoch": 1.560693641618497,
+      "grad_norm": 0.23123040795326233,
+      "learning_rate": 9.646559803512994e-06,
+      "loss": 0.0505,
+      "step": 810
+    },
+    {
+      "epoch": 1.579961464354528,
+      "grad_norm": 0.1902703195810318,
+      "learning_rate": 8.692497190280224e-06,
+      "loss": 0.0463,
+      "step": 820
+    },
+    {
+      "epoch": 1.5992292870905587,
+      "grad_norm": 0.30633774399757385,
+      "learning_rate": 7.783603724899257e-06,
+      "loss": 0.0465,
+      "step": 830
+    },
+    {
+      "epoch": 1.6184971098265897,
+      "grad_norm": 0.24036064743995667,
+      "learning_rate": 6.92087326903022e-06,
+      "loss": 0.0435,
+      "step": 840
+    },
+    {
+      "epoch": 1.6377649325626205,
+      "grad_norm": 0.20757320523262024,
+      "learning_rate": 6.1052492057601275e-06,
+      "loss": 0.0505,
+      "step": 850
+    },
+    {
+      "epoch": 1.6570327552986512,
+      "grad_norm": 0.3485427796840668,
+      "learning_rate": 5.337623408027293e-06,
+      "loss": 0.0441,
+      "step": 860
+    },
+    {
+      "epoch": 1.6763005780346822,
+      "grad_norm": 0.235531747341156,
+      "learning_rate": 4.618835263371396e-06,
+      "loss": 0.0464,
+      "step": 870
+    },
+    {
+      "epoch": 1.6955684007707128,
+      "grad_norm": 0.19758078455924988,
+      "learning_rate": 3.949670756075447e-06,
+      "loss": 0.0413,
+      "step": 880
+    },
+    {
+      "epoch": 1.7148362235067438,
+      "grad_norm": 0.21413107216358185,
+      "learning_rate": 3.3308616077036115e-06,
+      "loss": 0.0412,
+      "step": 890
+    },
+    {
+      "epoch": 1.7341040462427746,
+      "grad_norm": 0.26742419600486755,
+      "learning_rate": 2.7630844769743757e-06,
+      "loss": 0.0454,
+      "step": 900
+    },
+    {
+      "epoch": 1.7533718689788054,
+      "grad_norm": 0.22228221595287323,
+      "learning_rate": 2.2469602198441573e-06,
+      "loss": 0.0392,
+      "step": 910
+    },
+    {
+      "epoch": 1.7726396917148364,
+      "grad_norm": 0.2834679186344147,
+      "learning_rate": 1.7830532106104747e-06,
+      "loss": 0.0436,
+      "step": 920
+    },
+    {
+      "epoch": 1.791907514450867,
+      "grad_norm": 0.260470449924469,
+      "learning_rate": 1.3718707247769135e-06,
+      "loss": 0.0424,
+      "step": 930
+    },
+    {
+      "epoch": 1.811175337186898,
+      "grad_norm": 0.2204512655735016,
+      "learning_rate": 1.0138623843548078e-06,
+      "loss": 0.0393,
+      "step": 940
+    },
+    {
+      "epoch": 1.8304431599229287,
+      "grad_norm": 0.23786160349845886,
+      "learning_rate": 7.094196662081831e-07,
+      "loss": 0.0472,
+      "step": 950
+    },
+    {
+      "epoch": 1.8497109826589595,
+      "grad_norm": 0.18833257257938385,
+      "learning_rate": 4.5887547397955864e-07,
+      "loss": 0.0416,
+      "step": 960
+    },
+    {
+      "epoch": 1.8689788053949905,
+      "grad_norm": 0.20266398787498474,
+      "learning_rate": 2.625037740646763e-07,
+      "loss": 0.0426,
+      "step": 970
+    },
+    {
+      "epoch": 1.888246628131021,
+      "grad_norm": 0.2135821133852005,
+      "learning_rate": 1.2051929603428825e-07,
+      "loss": 0.0445,
+      "step": 980
+    },
+    {
+      "epoch": 1.907514450867052,
+      "grad_norm": 0.2030361145734787,
+      "learning_rate": 3.3077297830541584e-08,
+      "loss": 0.045,
+      "step": 990
+    },
+    {
+      "epoch": 1.9267822736030829,
+      "grad_norm": 0.31092095375061035,
+      "learning_rate": 2.7339599464326627e-10,
+      "loss": 0.0434,
+      "step": 1000
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 1000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 1000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 0.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}

config.json ADDED Viewed

	@@ -0,0 +1,64 @@

+{
+  "action_dim": 32,
+  "action_head_cfg": {
+    "action_dim": 32,
+    "action_horizon": 16,
+    "add_pos_embed": true,
+    "backbone_embedding_dim": 2048,
+    "diffusion_model_cfg": {
+      "attention_head_dim": 48,
+      "cross_attention_dim": 2048,
+      "dropout": 0.2,
+      "final_dropout": true,
+      "interleave_self_attention": true,
+      "norm_type": "ada_norm",
+      "num_attention_heads": 32,
+      "num_layers": 16,
+      "output_dim": 1024,
+      "positional_embeddings": null
+    },
+    "hidden_size": 1024,
+    "input_embedding_dim": 1536,
+    "max_action_dim": 32,
+    "max_state_dim": 64,
+    "model_dtype": "float32",
+    "noise_beta_alpha": 1.5,
+    "noise_beta_beta": 1.0,
+    "noise_s": 0.999,
+    "num_inference_timesteps": 4,
+    "num_target_vision_tokens": 32,
+    "num_timestep_buckets": 1000,
+    "tune_diffusion_model": true,
+    "tune_projector": true,
+    "use_vlln": true,
+    "vl_self_attention_cfg": {
+      "attention_head_dim": 64,
+      "dropout": 0.2,
+      "final_dropout": true,
+      "num_attention_heads": 32,
+      "num_layers": 4,
+      "positional_embeddings": null
+    }
+  },
+  "action_horizon": 16,
+  "architectures": [
+    "GR00T_N1_5"
+  ],
+  "attn_implementation": null,
+  "backbone_cfg": {
+    "eagle_path": "NVEagle/eagle_er-qwen3_1_7B-Siglip2_400M_stage1_5_128gpu_er_v7_1mlp_nops",
+    "load_bf16": false,
+    "project_to_dim": null,
+    "reproject_vision": false,
+    "select_layer": 12,
+    "tune_llm": false,
+    "tune_visual": true,
+    "use_flash_attention": true
+  },
+  "compute_dtype": "bfloat16",
+  "hidden_size": 2048,
+  "model_dtype": "float32",
+  "model_type": "gr00t_n1_5",
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.51.3"
+}

eval_plot.png ADDED Viewed

Git LFS Details

SHA256: e37cbf5a8e05623de6583072da6553fc8047372fb092b14ffbc0ac720b93f55e
Pointer size: 132 Bytes
Size of remote file: 1.35 MB

experiment_cfg/metadata.json ADDED Viewed

	@@ -0,0 +1,259 @@

+{
+    "oxe_droid": {
+        "statistics": {
+            "state": {
+                "eef_position": {
+                    "max": [
+                        0.46507203578948975,
+                        1.447200059890747,
+                        0.290057897567749
+                    ],
+                    "min": [
+                        -0.40590497851371765,
+                        -0.024546854197978973,
+                        -0.22337864339351654
+                    ],
+                    "mean": [
+                        0.05521393194794655,
+                        0.6310628056526184,
+                        -0.01189372967928648
+                    ],
+                    "std": [
+                        0.18417152762413025,
+                        0.2100915163755417,
+                        0.0770791620016098
+                    ],
+                    "q01": [
+                        -0.37289209306240084,
+                        0.14116937458515166,
+                        -0.1820696112513542
+                    ],
+                    "q99": [
+                        0.39412812441587447,
+                        1.09063316822052,
+                        0.1658253240585327
+                    ]
+                },
+                "eef_rotation": {
+                    "max": [
+                        -0.35259532928466797,
+                        0.8778502345085144,
+                        2.965937376022339
+                    ],
+                    "min": [
+                        -2.3446121215820312,
+                        -0.47837626934051514,
+                        1.515358328819275
+                    ],
+                    "mean": [
+                        -1.6130940914154053,
+                        -0.06260889768600464,
+                        2.2893757820129395
+                    ],
+                    "std": [
+                        0.3057422637939453,
+                        0.13727501034736633,
+                        0.18679431080818176
+                    ],
+                    "q01": [
+                        -2.151788368225098,
+                        -0.348803762793541,
+                        1.8247082424163819
+                    ],
+                    "q99": [
+                        -0.8719659841060661,
+                        0.32717462211847265,
+                        2.695855288505554
+                    ]
+                },
+                "gripper_position": {
+                    "max": [
+                        1.0660839080810547
+                    ],
+                    "min": [
+                        -0.862152636051178
+                    ],
+                    "mean": [
+                        0.08788274973630905
+                    ],
+                    "std": [
+                        0.2687181830406189
+                    ],
+                    "q01": [
+                        -0.6035337620973587
+                    ],
+                    "q99": [
+                        0.7679916465282439
+                    ]
+                }
+            },
+            "action": {
+                "eef_position_delta": {
+                    "max": [
+                        0.5925613045692444,
+                        0.8723994493484497,
+                        0.69416743516922
+                    ],
+                    "min": [
+                        -0.960589587688446,
+                        -0.8221790194511414,
+                        -0.8575763702392578
+                    ],
+                    "mean": [
+                        0.03402156010270119,
+                        -0.007893561385571957,
+                        0.0033362761605530977
+                    ],
+                    "std": [
+                        0.19854718446731567,
+                        0.15367349982261658,
+                        0.14636090397834778
+                    ],
+                    "q01": [
+                        -0.47471584677696227,
+                        -0.4062542155385018,
+                        -0.26967047452926635
+                    ],
+                    "q99": [
+                        0.4079366648197173,
+                        0.34532205790281273,
+                        0.4184985920786849
+                    ]
+                },
+                "eef_rotation_delta": {
+                    "max": [
+                        0.8240683674812317,
+                        0.7224147915840149,
+                        0.8612414598464966
+                    ],
+                    "min": [
+                        -0.8100513815879822,
+                        -0.8826454877853394,
+                        -0.9472323656082153
+                    ],
+                    "mean": [
+                        0.0014922608388587832,
+                        -0.029764438048005104,
+                        0.006220718380063772
+                    ],
+                    "std": [
+                        0.14293360710144043,
+                        0.16134193539619446,
+                        0.2356378585100174
+                    ],
+                    "q01": [
+                        -0.3907526931166649,
+                        -0.4691345453262329,
+                        -0.5505707603693009
+                    ],
+                    "q99": [
+                        0.37015672087669316,
+                        0.39511197239160506,
+                        0.573674650788307
+                    ]
+                },
+                "gripper_position": {
+                    "max": [
+                        1.0
+                    ],
+                    "min": [
+                        0.0
+                    ],
+                    "mean": [
+                        0.6008301973342896
+                    ],
+                    "std": [
+                        0.3934294879436493
+                    ],
+                    "q01": [
+                        0.07158590108156204
+                    ],
+                    "q99": [
+                        1.0
+                    ]
+                }
+            }
+        },
+        "modalities": {
+            "video": {
+                "exterior_image_1": {
+                    "resolution": [
+                        320,
+                        180
+                    ],
+                    "channels": 3,
+                    "fps": 15.0
+                },
+                "exterior_image_2": {
+                    "resolution": [
+                        320,
+                        180
+                    ],
+                    "channels": 3,
+                    "fps": 15.0
+                },
+                "wrist_image": {
+                    "resolution": [
+                        320,
+                        180
+                    ],
+                    "channels": 3,
+                    "fps": 15.0
+                }
+            },
+            "state": {
+                "eef_position": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        3
+                    ],
+                    "continuous": true
+                },
+                "eef_rotation": {
+                    "absolute": true,
+                    "rotation_type": "euler_angles_rpy",
+                    "shape": [
+                        3
+                    ],
+                    "continuous": true
+                },
+                "gripper_position": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                }
+            },
+            "action": {
+                "eef_position_delta": {
+                    "absolute": false,
+                    "rotation_type": null,
+                    "shape": [
+                        3
+                    ],
+                    "continuous": true
+                },
+                "eef_rotation_delta": {
+                    "absolute": false,
+                    "rotation_type": "axis_angle",
+                    "shape": [
+                        3
+                    ],
+                    "continuous": true
+                },
+                "gripper_position": {
+                    "absolute": false,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                }
+            }
+        },
+        "embodiment_tag": "oxe_droid"
+    }
+}

model-00001-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:da5487b7f3546322ddae34bb07dccab41feae6c722565237dc73a0f96861062b
+size 4999367032

model-00002-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c1f2def2be205cda90f1f730f18164dec31b074b0c482d18949dc4a78c32cce4
+size 2586705312

model.safetensors.index.json ADDED Viewed

The diff for this file is too large to render. See raw diff

trainer_state.json ADDED Viewed

	@@ -0,0 +1,743 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.9267822736030829,
+  "eval_steps": 500,
+  "global_step": 1000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.019267822736030827,
+      "grad_norm": 1.7207869291305542,
+      "learning_rate": 1.8e-05,
+      "loss": 0.2916,
+      "step": 10
+    },
+    {
+      "epoch": 0.038535645472061654,
+      "grad_norm": 0.542586088180542,
+      "learning_rate": 3.8e-05,
+      "loss": 0.1243,
+      "step": 20
+    },
+    {
+      "epoch": 0.057803468208092484,
+      "grad_norm": 0.7696737051010132,
+      "learning_rate": 5.8e-05,
+      "loss": 0.1138,
+      "step": 30
+    },
+    {
+      "epoch": 0.07707129094412331,
+      "grad_norm": 0.524152398109436,
+      "learning_rate": 7.800000000000001e-05,
+      "loss": 0.1041,
+      "step": 40
+    },
+    {
+      "epoch": 0.09633911368015415,
+      "grad_norm": 0.4883207380771637,
+      "learning_rate": 9.8e-05,
+      "loss": 0.0938,
+      "step": 50
+    },
+    {
+      "epoch": 0.11560693641618497,
+      "grad_norm": 0.6807201504707336,
+      "learning_rate": 9.997785653888835e-05,
+      "loss": 0.1036,
+      "step": 60
+    },
+    {
+      "epoch": 0.1348747591522158,
+      "grad_norm": 0.5879040360450745,
+      "learning_rate": 9.990133642141359e-05,
+      "loss": 0.0913,
+      "step": 70
+    },
+    {
+      "epoch": 0.15414258188824662,
+      "grad_norm": 0.2884378135204315,
+      "learning_rate": 9.977024992520602e-05,
+      "loss": 0.0937,
+      "step": 80
+    },
+    {
+      "epoch": 0.17341040462427745,
+      "grad_norm": 0.31237179040908813,
+      "learning_rate": 9.95847403914247e-05,
+      "loss": 0.0818,
+      "step": 90
+    },
+    {
+      "epoch": 0.1926782273603083,
+      "grad_norm": 0.5337116718292236,
+      "learning_rate": 9.934501067202117e-05,
+      "loss": 0.0871,
+      "step": 100
+    },
+    {
+      "epoch": 0.2119460500963391,
+      "grad_norm": 0.35781776905059814,
+      "learning_rate": 9.905132290792394e-05,
+      "loss": 0.0813,
+      "step": 110
+    },
+    {
+      "epoch": 0.23121387283236994,
+      "grad_norm": 0.41506218910217285,
+      "learning_rate": 9.870399824239117e-05,
+      "loss": 0.0807,
+      "step": 120
+    },
+    {
+      "epoch": 0.2504816955684008,
+      "grad_norm": 0.2967628836631775,
+      "learning_rate": 9.830341646984521e-05,
+      "loss": 0.0824,
+      "step": 130
+    },
+    {
+      "epoch": 0.2697495183044316,
+      "grad_norm": 0.31782740354537964,
+      "learning_rate": 9.785001562057309e-05,
+      "loss": 0.0756,
+      "step": 140
+    },
+    {
+      "epoch": 0.28901734104046245,
+      "grad_norm": 0.34691065549850464,
+      "learning_rate": 9.734429148174675e-05,
+      "loss": 0.084,
+      "step": 150
+    },
+    {
+      "epoch": 0.30828516377649323,
+      "grad_norm": 0.48853012919425964,
+      "learning_rate": 9.6786797055287e-05,
+      "loss": 0.0832,
+      "step": 160
+    },
+    {
+      "epoch": 0.32755298651252407,
+      "grad_norm": 0.39761826395988464,
+      "learning_rate": 9.617814195316411e-05,
+      "loss": 0.0773,
+      "step": 170
+    },
+    {
+      "epoch": 0.3468208092485549,
+      "grad_norm": 0.41890355944633484,
+      "learning_rate": 9.551899173079607e-05,
+      "loss": 0.072,
+      "step": 180
+    },
+    {
+      "epoch": 0.36608863198458574,
+      "grad_norm": 0.5497244596481323,
+      "learning_rate": 9.481006715927351e-05,
+      "loss": 0.0733,
+      "step": 190
+    },
+    {
+      "epoch": 0.3853564547206166,
+      "grad_norm": 0.28932520747184753,
+      "learning_rate": 9.405214343720707e-05,
+      "loss": 0.0814,
+      "step": 200
+    },
+    {
+      "epoch": 0.4046242774566474,
+      "grad_norm": 0.5443249344825745,
+      "learning_rate": 9.32460493430591e-05,
+      "loss": 0.0789,
+      "step": 210
+    },
+    {
+      "epoch": 0.4238921001926782,
+      "grad_norm": 0.5669806003570557,
+      "learning_rate": 9.239266632888659e-05,
+      "loss": 0.0717,
+      "step": 220
+    },
+    {
+      "epoch": 0.44315992292870904,
+      "grad_norm": 0.3382496237754822,
+      "learning_rate": 9.14929275564863e-05,
+      "loss": 0.0714,
+      "step": 230
+    },
+    {
+      "epoch": 0.4624277456647399,
+      "grad_norm": 0.47498270869255066,
+      "learning_rate": 9.0547816876996e-05,
+      "loss": 0.0691,
+      "step": 240
+    },
+    {
+      "epoch": 0.4816955684007707,
+      "grad_norm": 0.4482809007167816,
+      "learning_rate": 8.955836775506776e-05,
+      "loss": 0.0694,
+      "step": 250
+    },
+    {
+      "epoch": 0.5009633911368016,
+      "grad_norm": 0.5501825213432312,
+      "learning_rate": 8.852566213878947e-05,
+      "loss": 0.0686,
+      "step": 260
+    },
+    {
+      "epoch": 0.5202312138728323,
+      "grad_norm": 0.3987138867378235,
+      "learning_rate": 8.745082927659047e-05,
+      "loss": 0.078,
+      "step": 270
+    },
+    {
+      "epoch": 0.5394990366088632,
+      "grad_norm": 0.28167715668678284,
+      "learning_rate": 8.633504448242505e-05,
+      "loss": 0.0758,
+      "step": 280
+    },
+    {
+      "epoch": 0.558766859344894,
+      "grad_norm": 0.24536319077014923,
+      "learning_rate": 8.517952785058385e-05,
+      "loss": 0.0747,
+      "step": 290
+    },
+    {
+      "epoch": 0.5780346820809249,
+      "grad_norm": 0.2790259122848511,
+      "learning_rate": 8.398554292153866e-05,
+      "loss": 0.0681,
+      "step": 300
+    },
+    {
+      "epoch": 0.5973025048169557,
+      "grad_norm": 0.4405696988105774,
+      "learning_rate": 8.275439530027948e-05,
+      "loss": 0.0713,
+      "step": 310
+    },
+    {
+      "epoch": 0.6165703275529865,
+      "grad_norm": 0.3930000066757202,
+      "learning_rate": 8.148743122865463e-05,
+      "loss": 0.0633,
+      "step": 320
+    },
+    {
+      "epoch": 0.6358381502890174,
+      "grad_norm": 0.39871180057525635,
+      "learning_rate": 8.018603611327504e-05,
+      "loss": 0.0735,
+      "step": 330
+    },
+    {
+      "epoch": 0.6551059730250481,
+      "grad_norm": 0.43595343828201294,
+      "learning_rate": 7.88516330105925e-05,
+      "loss": 0.0651,
+      "step": 340
+    },
+    {
+      "epoch": 0.674373795761079,
+      "grad_norm": 0.3049931824207306,
+      "learning_rate": 7.748568107080832e-05,
+      "loss": 0.0641,
+      "step": 350
+    },
+    {
+      "epoch": 0.6936416184971098,
+      "grad_norm": 0.2661554515361786,
+      "learning_rate": 7.608967394231387e-05,
+      "loss": 0.0615,
+      "step": 360
+    },
+    {
+      "epoch": 0.7129094412331407,
+      "grad_norm": 0.23182271420955658,
+      "learning_rate": 7.466513813840825e-05,
+      "loss": 0.0641,
+      "step": 370
+    },
+    {
+      "epoch": 0.7321772639691715,
+      "grad_norm": 0.36455002427101135,
+      "learning_rate": 7.32136313680782e-05,
+      "loss": 0.0694,
+      "step": 380
+    },
+    {
+      "epoch": 0.7514450867052023,
+      "grad_norm": 0.37729915976524353,
+      "learning_rate": 7.173674083266624e-05,
+      "loss": 0.0657,
+      "step": 390
+    },
+    {
+      "epoch": 0.7707129094412332,
+      "grad_norm": 0.3024880290031433,
+      "learning_rate": 7.023608149028937e-05,
+      "loss": 0.0595,
+      "step": 400
+    },
+    {
+      "epoch": 0.789980732177264,
+      "grad_norm": 0.31104758381843567,
+      "learning_rate": 6.871329428990602e-05,
+      "loss": 0.0681,
+      "step": 410
+    },
+    {
+      "epoch": 0.8092485549132948,
+      "grad_norm": 0.350875586271286,
+      "learning_rate": 6.71700443769625e-05,
+      "loss": 0.061,
+      "step": 420
+    },
+    {
+      "epoch": 0.8285163776493256,
+      "grad_norm": 0.2729007303714752,
+      "learning_rate": 6.56080192725808e-05,
+      "loss": 0.0666,
+      "step": 430
+    },
+    {
+      "epoch": 0.8477842003853564,
+      "grad_norm": 0.29519274830818176,
+      "learning_rate": 6.402892702827916e-05,
+      "loss": 0.0617,
+      "step": 440
+    },
+    {
+      "epoch": 0.8670520231213873,
+      "grad_norm": 0.41105949878692627,
+      "learning_rate": 6.243449435824276e-05,
+      "loss": 0.0635,
+      "step": 450
+    },
+    {
+      "epoch": 0.8863198458574181,
+      "grad_norm": 0.39932650327682495,
+      "learning_rate": 6.0826464751186994e-05,
+      "loss": 0.0668,
+      "step": 460
+    },
+    {
+      "epoch": 0.905587668593449,
+      "grad_norm": 0.36617910861968994,
+      "learning_rate": 5.9206596563878357e-05,
+      "loss": 0.0642,
+      "step": 470
+    },
+    {
+      "epoch": 0.9248554913294798,
+      "grad_norm": 0.3059275150299072,
+      "learning_rate": 5.757666109839702e-05,
+      "loss": 0.0639,
+      "step": 480
+    },
+    {
+      "epoch": 0.9441233140655106,
+      "grad_norm": 0.3001484274864197,
+      "learning_rate": 5.5938440665244006e-05,
+      "loss": 0.0581,
+      "step": 490
+    },
+    {
+      "epoch": 0.9633911368015414,
+      "grad_norm": 0.3571004569530487,
+      "learning_rate": 5.4293726634410855e-05,
+      "loss": 0.0558,
+      "step": 500
+    },
+    {
+      "epoch": 0.9826589595375722,
+      "grad_norm": 0.3813907206058502,
+      "learning_rate": 5.264431747654284e-05,
+      "loss": 0.0618,
+      "step": 510
+    },
+    {
+      "epoch": 1.001926782273603,
+      "grad_norm": 0.29777514934539795,
+      "learning_rate": 5.0992016796337686e-05,
+      "loss": 0.0578,
+      "step": 520
+    },
+    {
+      "epoch": 1.0211946050096339,
+      "grad_norm": 0.28990840911865234,
+      "learning_rate": 4.93386313603304e-05,
+      "loss": 0.0613,
+      "step": 530
+    },
+    {
+      "epoch": 1.0404624277456647,
+      "grad_norm": 0.43356871604919434,
+      "learning_rate": 4.7685969121220456e-05,
+      "loss": 0.057,
+      "step": 540
+    },
+    {
+      "epoch": 1.0597302504816957,
+      "grad_norm": 0.2468729466199875,
+      "learning_rate": 4.60358372409022e-05,
+      "loss": 0.053,
+      "step": 550
+    },
+    {
+      "epoch": 1.0789980732177264,
+      "grad_norm": 0.33707353472709656,
+      "learning_rate": 4.439004011435979e-05,
+      "loss": 0.0523,
+      "step": 560
+    },
+    {
+      "epoch": 1.0982658959537572,
+      "grad_norm": 0.3425613343715668,
+      "learning_rate": 4.275037739658771e-05,
+      "loss": 0.0533,
+      "step": 570
+    },
+    {
+      "epoch": 1.117533718689788,
+      "grad_norm": 0.21212391555309296,
+      "learning_rate": 4.111864203469457e-05,
+      "loss": 0.0506,
+      "step": 580
+    },
+    {
+      "epoch": 1.1368015414258188,
+      "grad_norm": 0.2778816223144531,
+      "learning_rate": 3.949661830734172e-05,
+      "loss": 0.0575,
+      "step": 590
+    },
+    {
+      "epoch": 1.1560693641618498,
+      "grad_norm": 0.2917283773422241,
+      "learning_rate": 3.788607987366069e-05,
+      "loss": 0.0571,
+      "step": 600
+    },
+    {
+      "epoch": 1.1753371868978806,
+      "grad_norm": 0.27486225962638855,
+      "learning_rate": 3.628878783378302e-05,
+      "loss": 0.056,
+      "step": 610
+    },
+    {
+      "epoch": 1.1946050096339114,
+      "grad_norm": 0.40446701645851135,
+      "learning_rate": 3.470648880310313e-05,
+      "loss": 0.0529,
+      "step": 620
+    },
+    {
+      "epoch": 1.2138728323699421,
+      "grad_norm": 0.25892701745033264,
+      "learning_rate": 3.3140913002379995e-05,
+      "loss": 0.0504,
+      "step": 630
+    },
+    {
+      "epoch": 1.2331406551059731,
+      "grad_norm": 0.26697343587875366,
+      "learning_rate": 3.1593772365766105e-05,
+      "loss": 0.0509,
+      "step": 640
+    },
+    {
+      "epoch": 1.252408477842004,
+      "grad_norm": 0.2691023051738739,
+      "learning_rate": 3.006675866883275e-05,
+      "loss": 0.0576,
+      "step": 650
+    },
+    {
+      "epoch": 1.2716763005780347,
+      "grad_norm": 0.2726045250892639,
+      "learning_rate": 2.8561541678638142e-05,
+      "loss": 0.051,
+      "step": 660
+    },
+    {
+      "epoch": 1.2909441233140655,
+      "grad_norm": 0.3852327764034271,
+      "learning_rate": 2.707976732786166e-05,
+      "loss": 0.0535,
+      "step": 670
+    },
+    {
+      "epoch": 1.3102119460500963,
+      "grad_norm": 0.2920765280723572,
+      "learning_rate": 2.562305591500069e-05,
+      "loss": 0.0492,
+      "step": 680
+    },
+    {
+      "epoch": 1.3294797687861273,
+      "grad_norm": 0.1913856863975525,
+      "learning_rate": 2.419300033259798e-05,
+      "loss": 0.0507,
+      "step": 690
+    },
+    {
+      "epoch": 1.348747591522158,
+      "grad_norm": 0.29561716318130493,
+      "learning_rate": 2.279116432543705e-05,
+      "loss": 0.0499,
+      "step": 700
+    },
+    {
+      "epoch": 1.3680154142581888,
+      "grad_norm": 0.4290018379688263,
+      "learning_rate": 2.1419080780610123e-05,
+      "loss": 0.0571,
+      "step": 710
+    },
+    {
+      "epoch": 1.3872832369942196,
+      "grad_norm": 0.2543991506099701,
+      "learning_rate": 2.0078250051328784e-05,
+      "loss": 0.0524,
+      "step": 720
+    },
+    {
+      "epoch": 1.4065510597302504,
+      "grad_norm": 0.2487826943397522,
+      "learning_rate": 1.877013831630961e-05,
+      "loss": 0.0539,
+      "step": 730
+    },
+    {
+      "epoch": 1.4258188824662814,
+      "grad_norm": 0.3550814390182495,
+      "learning_rate": 1.749617597652934e-05,
+      "loss": 0.0436,
+      "step": 740
+    },
+    {
+      "epoch": 1.4450867052023122,
+      "grad_norm": 0.2873481810092926,
+      "learning_rate": 1.62577560911024e-05,
+      "loss": 0.0463,
+      "step": 750
+    },
+    {
+      "epoch": 1.464354527938343,
+      "grad_norm": 0.22322338819503784,
+      "learning_rate": 1.5056232853991209e-05,
+      "loss": 0.0427,
+      "step": 760
+    },
+    {
+      "epoch": 1.4836223506743738,
+      "grad_norm": 0.23233197629451752,
+      "learning_rate": 1.389292011321498e-05,
+      "loss": 0.0427,
+      "step": 770
+    },
+    {
+      "epoch": 1.5028901734104045,
+      "grad_norm": 0.25211092829704285,
+      "learning_rate": 1.2769089934176126e-05,
+      "loss": 0.0456,
+      "step": 780
+    },
+    {
+      "epoch": 1.5221579961464355,
+      "grad_norm": 0.19579973816871643,
+      "learning_rate": 1.1685971208675539e-05,
+      "loss": 0.0447,
+      "step": 790
+    },
+    {
+      "epoch": 1.5414258188824663,
+      "grad_norm": 0.2609504461288452,
+      "learning_rate": 1.0644748311137376e-05,
+      "loss": 0.0467,
+      "step": 800
+    },
+    {
+      "epoch": 1.560693641618497,
+      "grad_norm": 0.23123040795326233,
+      "learning_rate": 9.646559803512994e-06,
+      "loss": 0.0505,
+      "step": 810
+    },
+    {
+      "epoch": 1.579961464354528,
+      "grad_norm": 0.1902703195810318,
+      "learning_rate": 8.692497190280224e-06,
+      "loss": 0.0463,
+      "step": 820
+    },
+    {
+      "epoch": 1.5992292870905587,
+      "grad_norm": 0.30633774399757385,
+      "learning_rate": 7.783603724899257e-06,
+      "loss": 0.0465,
+      "step": 830
+    },
+    {
+      "epoch": 1.6184971098265897,
+      "grad_norm": 0.24036064743995667,
+      "learning_rate": 6.92087326903022e-06,
+      "loss": 0.0435,
+      "step": 840
+    },
+    {
+      "epoch": 1.6377649325626205,
+      "grad_norm": 0.20757320523262024,
+      "learning_rate": 6.1052492057601275e-06,
+      "loss": 0.0505,
+      "step": 850
+    },
+    {
+      "epoch": 1.6570327552986512,
+      "grad_norm": 0.3485427796840668,
+      "learning_rate": 5.337623408027293e-06,
+      "loss": 0.0441,
+      "step": 860
+    },
+    {
+      "epoch": 1.6763005780346822,
+      "grad_norm": 0.235531747341156,
+      "learning_rate": 4.618835263371396e-06,
+      "loss": 0.0464,
+      "step": 870
+    },
+    {
+      "epoch": 1.6955684007707128,
+      "grad_norm": 0.19758078455924988,
+      "learning_rate": 3.949670756075447e-06,
+      "loss": 0.0413,
+      "step": 880
+    },
+    {
+      "epoch": 1.7148362235067438,
+      "grad_norm": 0.21413107216358185,
+      "learning_rate": 3.3308616077036115e-06,
+      "loss": 0.0412,
+      "step": 890
+    },
+    {
+      "epoch": 1.7341040462427746,
+      "grad_norm": 0.26742419600486755,
+      "learning_rate": 2.7630844769743757e-06,
+      "loss": 0.0454,
+      "step": 900
+    },
+    {
+      "epoch": 1.7533718689788054,
+      "grad_norm": 0.22228221595287323,
+      "learning_rate": 2.2469602198441573e-06,
+      "loss": 0.0392,
+      "step": 910
+    },
+    {
+      "epoch": 1.7726396917148364,
+      "grad_norm": 0.2834679186344147,
+      "learning_rate": 1.7830532106104747e-06,
+      "loss": 0.0436,
+      "step": 920
+    },
+    {
+      "epoch": 1.791907514450867,
+      "grad_norm": 0.260470449924469,
+      "learning_rate": 1.3718707247769135e-06,
+      "loss": 0.0424,
+      "step": 930
+    },
+    {
+      "epoch": 1.811175337186898,
+      "grad_norm": 0.2204512655735016,
+      "learning_rate": 1.0138623843548078e-06,
+      "loss": 0.0393,
+      "step": 940
+    },
+    {
+      "epoch": 1.8304431599229287,
+      "grad_norm": 0.23786160349845886,
+      "learning_rate": 7.094196662081831e-07,
+      "loss": 0.0472,
+      "step": 950
+    },
+    {
+      "epoch": 1.8497109826589595,
+      "grad_norm": 0.18833257257938385,
+      "learning_rate": 4.5887547397955864e-07,
+      "loss": 0.0416,
+      "step": 960
+    },
+    {
+      "epoch": 1.8689788053949905,
+      "grad_norm": 0.20266398787498474,
+      "learning_rate": 2.625037740646763e-07,
+      "loss": 0.0426,
+      "step": 970
+    },
+    {
+      "epoch": 1.888246628131021,
+      "grad_norm": 0.2135821133852005,
+      "learning_rate": 1.2051929603428825e-07,
+      "loss": 0.0445,
+      "step": 980
+    },
+    {
+      "epoch": 1.907514450867052,
+      "grad_norm": 0.2030361145734787,
+      "learning_rate": 3.3077297830541584e-08,
+      "loss": 0.045,
+      "step": 990
+    },
+    {
+      "epoch": 1.9267822736030829,
+      "grad_norm": 0.31092095375061035,
+      "learning_rate": 2.7339599464326627e-10,
+      "loss": 0.0434,
+      "step": 1000
+    },
+    {
+      "epoch": 1.9267822736030829,
+      "step": 1000,
+      "total_flos": 0.0,
+      "train_loss": 0.06433489021658897,
+      "train_runtime": 935.0208,
+      "train_samples_per_second": 34.224,
+      "train_steps_per_second": 1.069
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 1000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 1000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 0.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f7333e1af2bdf6aa8ec2a60059ce66a628f29eb126dae4e4e6a1b469b2b0715e
+size 5304