ShuaiYang03 commited on Aug 12

Commit

63c1948

verified ·

1 Parent(s): e4cd682

Upload folder using huggingface_hub

Browse files

Files changed (26) hide show

checkpoint-30000/config.json +64 -0
checkpoint-30000/experiment_cfg/metadata.json +447 -0
checkpoint-30000/model-00001-of-00002.safetensors +3 -0
checkpoint-30000/model-00002-of-00002.safetensors +3 -0
checkpoint-30000/model.safetensors.index.json +0 -0
checkpoint-30000/optimizer.pt +3 -0
checkpoint-30000/rng_state_0.pth +3 -0
checkpoint-30000/rng_state_1.pth +3 -0
checkpoint-30000/rng_state_2.pth +3 -0
checkpoint-30000/rng_state_3.pth +3 -0
checkpoint-30000/rng_state_4.pth +3 -0
checkpoint-30000/rng_state_5.pth +3 -0
checkpoint-30000/rng_state_6.pth +3 -0
checkpoint-30000/rng_state_7.pth +3 -0
checkpoint-30000/scheduler.pt +3 -0
checkpoint-30000/trainer_state.json +0 -0
config.json +64 -0
experiment_cfg/metadata.json +447 -0
model-00001-of-00002.safetensors +3 -0
model-00002-of-00002.safetensors +3 -0
model.safetensors.index.json +0 -0
results.csv +2 -0
results.md +29 -0
total.metrics +230 -0
trainer_state.json +0 -0
training_args.bin +3 -0

checkpoint-30000/config.json ADDED Viewed

	@@ -0,0 +1,64 @@

+{
+  "action_dim": 32,
+  "action_head_cfg": {
+    "action_dim": 32,
+    "action_horizon": 16,
+    "add_pos_embed": true,
+    "backbone_embedding_dim": 2048,
+    "diffusion_model_cfg": {
+      "attention_head_dim": 48,
+      "cross_attention_dim": 2048,
+      "dropout": 0.2,
+      "final_dropout": true,
+      "interleave_self_attention": true,
+      "norm_type": "ada_norm",
+      "num_attention_heads": 32,
+      "num_layers": 16,
+      "output_dim": 1024,
+      "positional_embeddings": null
+    },
+    "hidden_size": 1024,
+    "input_embedding_dim": 1536,
+    "max_action_dim": 32,
+    "max_state_dim": 64,
+    "model_dtype": "float32",
+    "noise_beta_alpha": 1.5,
+    "noise_beta_beta": 1.0,
+    "noise_s": 0.999,
+    "num_inference_timesteps": 4,
+    "num_target_vision_tokens": 32,
+    "num_timestep_buckets": 1000,
+    "tune_diffusion_model": true,
+    "tune_projector": true,
+    "use_vlln": true,
+    "vl_self_attention_cfg": {
+      "attention_head_dim": 64,
+      "dropout": 0.2,
+      "final_dropout": true,
+      "num_attention_heads": 32,
+      "num_layers": 4,
+      "positional_embeddings": null
+    }
+  },
+  "action_horizon": 16,
+  "architectures": [
+    "GR00T_N1_5"
+  ],
+  "attn_implementation": null,
+  "backbone_cfg": {
+    "eagle_path": "NVEagle/eagle_er-qwen3_1_7B-Siglip2_400M_stage1_5_128gpu_er_v7_1mlp_nops",
+    "load_bf16": false,
+    "project_to_dim": null,
+    "reproject_vision": false,
+    "select_layer": 12,
+    "tune_llm": false,
+    "tune_visual": true,
+    "use_flash_attention": true
+  },
+  "compute_dtype": "bfloat16",
+  "hidden_size": 2048,
+  "model_dtype": "float32",
+  "model_type": "gr00t_n1_5",
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.51.3"
+}

checkpoint-30000/experiment_cfg/metadata.json ADDED Viewed

	@@ -0,0 +1,447 @@

+{
+    "oxe": {
+        "statistics": {
+            "state": {
+                "x": {
+                    "max": [
+                        1.0534898042678833
+                    ],
+                    "min": [
+                        -0.4436439275741577
+                    ],
+                    "mean": [
+                        0.5598947405815125
+                    ],
+                    "std": [
+                        0.12432649731636047
+                    ],
+                    "q01": [
+                        0.32481380939483645
+                    ],
+                    "q99": [
+                        0.8750156319141384
+                    ]
+                },
+                "y": {
+                    "max": [
+                        0.48018959164619446
+                    ],
+                    "min": [
+                        -0.9970501065254211
+                    ],
+                    "mean": [
+                        -0.08334138244390488
+                    ],
+                    "std": [
+                        0.11558396369218826
+                    ],
+                    "q01": [
+                        -0.28334290891885755
+                    ],
+                    "q99": [
+                        0.21247054174542404
+                    ]
+                },
+                "z": {
+                    "max": [
+                        1.6896663904190063
+                    ],
+                    "min": [
+                        -0.006579156965017319
+                    ],
+                    "mean": [
+                        0.7770950198173523
+                    ],
+                    "std": [
+                        0.24595585465431213
+                    ],
+                    "q01": [
+                        0.14107070609927178
+                    ],
+                    "q99": [
+                        1.0727112340927123
+                    ]
+                },
+                "rx": {
+                    "max": [
+                        0.9999993443489075
+                    ],
+                    "min": [
+                        -0.8643477559089661
+                    ],
+                    "mean": [
+                        -0.24804554879665375
+                    ],
+                    "std": [
+                        0.5126774907112122
+                    ],
+                    "q01": [
+                        -0.686474204659462
+                    ],
+                    "q99": [
+                        0.9377871316671368
+                    ]
+                },
+                "ry": {
+                    "max": [
+                        0.9999874830245972
+                    ],
+                    "min": [
+                        -0.7079970240592957
+                    ],
+                    "mean": [
+                        0.4951382279396057
+                    ],
+                    "std": [
+                        0.5218117237091064
+                    ],
+                    "q01": [
+                        -0.6808923494815826
+                    ],
+                    "q99": [
+                        0.9563051050901409
+                    ]
+                },
+                "rz": {
+                    "max": [
+                        0.9554369449615479
+                    ],
+                    "min": [
+                        -0.7688722014427185
+                    ],
+                    "mean": [
+                        0.09266126900911331
+                    ],
+                    "std": [
+                        0.16630452871322632
+                    ],
+                    "q01": [
+                        -0.36045596331357954
+                    ],
+                    "q99": [
+                        0.45990042358636823
+                    ]
+                },
+                "rw": {
+                    "max": [
+                        0.9914546012878418
+                    ],
+                    "min": [
+                        -0.4999994933605194
+                    ],
+                    "mean": [
+                        0.2097489982843399
+                    ],
+                    "std": [
+                        0.2754714787006378
+                    ],
+                    "q01": [
+                        -0.454380963742733
+                    ],
+                    "q99": [
+                        0.7216041100025177
+                    ]
+                },
+                "gripper": {
+                    "max": [
+                        1.0
+                    ],
+                    "min": [
+                        0.0
+                    ],
+                    "mean": [
+                        0.4261345863342285
+                    ],
+                    "std": [
+                        0.4554642140865326
+                    ],
+                    "q01": [
+                        0.0
+                    ],
+                    "q99": [
+                        1.0
+                    ]
+                }
+            },
+            "action": {
+                "x": {
+                    "max": [
+                        2.9984593391418457
+                    ],
+                    "min": [
+                        -2.0204520225524902
+                    ],
+                    "mean": [
+                        0.00698750140145421
+                    ],
+                    "std": [
+                        0.06921227276325226
+                    ],
+                    "q01": [
+                        -0.22453527510166169
+                    ],
+                    "q99": [
+                        0.17824687153100965
+                    ]
+                },
+                "y": {
+                    "max": [
+                        22.09052848815918
+                    ],
+                    "min": [
+                        -5.497899532318115
+                    ],
+                    "mean": [
+                        0.006265923380851746
+                    ],
+                    "std": [
+                        0.059654854238033295
+                    ],
+                    "q01": [
+                        -0.14820013284683228
+                    ],
+                    "q99": [
+                        0.14938379630446405
+                    ]
+                },
+                "z": {
+                    "max": [
+                        2.7507524490356445
+                    ],
+                    "min": [
+                        -2.031663417816162
+                    ],
+                    "mean": [
+                        -0.012625134550035
+                    ],
+                    "std": [
+                        0.07353131473064423
+                    ],
+                    "q01": [
+                        -0.231589707583189
+                    ],
+                    "q99": [
+                        0.21842354819178575
+                    ]
+                },
+                "roll": {
+                    "max": [
+                        1.570636510848999
+                    ],
+                    "min": [
+                        -1.569917917251587
+                    ],
+                    "mean": [
+                        0.04333191365003586
+                    ],
+                    "std": [
+                        0.1561005562543869
+                    ],
+                    "q01": [
+                        -0.3517994859814644
+                    ],
+                    "q99": [
+                        0.5892666035890578
+                    ]
+                },
+                "pitch": {
+                    "max": [
+                        1.5321086645126343
+                    ],
+                    "min": [
+                        -1.569892168045044
+                    ],
+                    "mean": [
+                        -0.005756167229264975
+                    ],
+                    "std": [
+                        0.13164213299751282
+                    ],
+                    "q01": [
+                        -0.4193011274933815
+                    ],
+                    "q99": [
+                        0.35272657424211445
+                    ]
+                },
+                "yaw": {
+                    "max": [
+                        1.5691522359848022
+                    ],
+                    "min": [
+                        -1.570419430732727
+                    ],
+                    "mean": [
+                        0.0009130979306064546
+                    ],
+                    "std": [
+                        0.1459326595067978
+                    ],
+                    "q01": [
+                        -0.43643461108207704
+                    ],
+                    "q99": [
+                        0.44796681255102094
+                    ]
+                },
+                "gripper": {
+                    "max": [
+                        1.0
+                    ],
+                    "min": [
+                        0.0
+                    ],
+                    "mean": [
+                        0.5354204773902893
+                    ],
+                    "std": [
+                        0.4971078634262085
+                    ],
+                    "q01": [
+                        0.0
+                    ],
+                    "q99": [
+                        1.0
+                    ]
+                }
+            }
+        },
+        "modalities": {
+            "video": {
+                "image": {
+                    "resolution": [
+                        320,
+                        256
+                    ],
+                    "channels": 3,
+                    "fps": 3.0
+                }
+            },
+            "state": {
+                "x": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                },
+                "y": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                },
+                "z": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                },
+                "rx": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                },
+                "ry": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                },
+                "rz": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                },
+                "rw": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                },
+                "gripper": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                }
+            },
+            "action": {
+                "x": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                },
+                "y": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                },
+                "z": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                },
+                "roll": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                },
+                "pitch": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                },
+                "yaw": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                },
+                "gripper": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                }
+            }
+        },
+        "embodiment_tag": "oxe"
+    }
+}

checkpoint-30000/model-00001-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:26fe062d45063aca8af2a9065489b556d12447a5678fbb709477da9773573e1f
+size 4999367032

checkpoint-30000/model-00002-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:98c6531d73fdf296abc46ea5b9b5e3ff839797a6e896f6f4d1663aae4e92520f
+size 2586508600

checkpoint-30000/model.safetensors.index.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-30000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:03d07466c3cc0826a142ec542fb6a35ef345b91803d7369246e28685768dbc79
+size 8550325978

checkpoint-30000/rng_state_0.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c37ca53876599968a3004f603ba2faca9347a0627be93e4323aefe99dec05d6b
+size 15984

checkpoint-30000/rng_state_1.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5f34de092836aedd7bd6c444bf37502a196d1bbc81968a92c454464c1f9891fb
+size 15984

checkpoint-30000/rng_state_2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cdcc4ede83f102615569832e99536a097f90fd15c551762fb3fff12b74267ed7
+size 15984

checkpoint-30000/rng_state_3.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d6cc08751e7421295803ac7559da75e09874760dddab8652232d5c730ddbc670
+size 15984

checkpoint-30000/rng_state_4.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e887e917e9c9f904cd79b4199c446127455a6329bb32fdf0e21374cf9878ffe1
+size 15984

checkpoint-30000/rng_state_5.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a2272c665a91578e0ed4a4f9afe0a9c38d380abe4c194368e52545c5ab5468be
+size 15984

checkpoint-30000/rng_state_6.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:51825a147d0c52ef818cfee360b1a80a2c2d0a43d89d4a2fe5a85fb086db89b0
+size 15984

checkpoint-30000/rng_state_7.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:69623bd313d28ef4892d781d8040f9f9c3d159adc858bd82e4d4b99fdf1299a4
+size 15984

checkpoint-30000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ac42b4efebda871e7035b931b3f1fd2282cf31a9d4249ad956a15227d00969cb
+size 1064

checkpoint-30000/trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff

config.json ADDED Viewed

	@@ -0,0 +1,64 @@

+{
+  "action_dim": 32,
+  "action_head_cfg": {
+    "action_dim": 32,
+    "action_horizon": 16,
+    "add_pos_embed": true,
+    "backbone_embedding_dim": 2048,
+    "diffusion_model_cfg": {
+      "attention_head_dim": 48,
+      "cross_attention_dim": 2048,
+      "dropout": 0.2,
+      "final_dropout": true,
+      "interleave_self_attention": true,
+      "norm_type": "ada_norm",
+      "num_attention_heads": 32,
+      "num_layers": 16,
+      "output_dim": 1024,
+      "positional_embeddings": null
+    },
+    "hidden_size": 1024,
+    "input_embedding_dim": 1536,
+    "max_action_dim": 32,
+    "max_state_dim": 64,
+    "model_dtype": "float32",
+    "noise_beta_alpha": 1.5,
+    "noise_beta_beta": 1.0,
+    "noise_s": 0.999,
+    "num_inference_timesteps": 4,
+    "num_target_vision_tokens": 32,
+    "num_timestep_buckets": 1000,
+    "tune_diffusion_model": true,
+    "tune_projector": true,
+    "use_vlln": true,
+    "vl_self_attention_cfg": {
+      "attention_head_dim": 64,
+      "dropout": 0.2,
+      "final_dropout": true,
+      "num_attention_heads": 32,
+      "num_layers": 4,
+      "positional_embeddings": null
+    }
+  },
+  "action_horizon": 16,
+  "architectures": [
+    "GR00T_N1_5"
+  ],
+  "attn_implementation": null,
+  "backbone_cfg": {
+    "eagle_path": "NVEagle/eagle_er-qwen3_1_7B-Siglip2_400M_stage1_5_128gpu_er_v7_1mlp_nops",
+    "load_bf16": false,
+    "project_to_dim": null,
+    "reproject_vision": false,
+    "select_layer": 12,
+    "tune_llm": false,
+    "tune_visual": true,
+    "use_flash_attention": true
+  },
+  "compute_dtype": "bfloat16",
+  "hidden_size": 2048,
+  "model_dtype": "float32",
+  "model_type": "gr00t_n1_5",
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.51.3"
+}

experiment_cfg/metadata.json ADDED Viewed

	@@ -0,0 +1,447 @@

+{
+    "oxe": {
+        "statistics": {
+            "state": {
+                "x": {
+                    "max": [
+                        1.0534898042678833
+                    ],
+                    "min": [
+                        -0.4436439275741577
+                    ],
+                    "mean": [
+                        0.5598947405815125
+                    ],
+                    "std": [
+                        0.12432649731636047
+                    ],
+                    "q01": [
+                        0.32481380939483645
+                    ],
+                    "q99": [
+                        0.8750156319141384
+                    ]
+                },
+                "y": {
+                    "max": [
+                        0.48018959164619446
+                    ],
+                    "min": [
+                        -0.9970501065254211
+                    ],
+                    "mean": [
+                        -0.08334138244390488
+                    ],
+                    "std": [
+                        0.11558396369218826
+                    ],
+                    "q01": [
+                        -0.28334290891885755
+                    ],
+                    "q99": [
+                        0.21247054174542404
+                    ]
+                },
+                "z": {
+                    "max": [
+                        1.6896663904190063
+                    ],
+                    "min": [
+                        -0.006579156965017319
+                    ],
+                    "mean": [
+                        0.7770950198173523
+                    ],
+                    "std": [
+                        0.24595585465431213
+                    ],
+                    "q01": [
+                        0.14107070609927178
+                    ],
+                    "q99": [
+                        1.0727112340927123
+                    ]
+                },
+                "rx": {
+                    "max": [
+                        0.9999993443489075
+                    ],
+                    "min": [
+                        -0.8643477559089661
+                    ],
+                    "mean": [
+                        -0.24804554879665375
+                    ],
+                    "std": [
+                        0.5126774907112122
+                    ],
+                    "q01": [
+                        -0.686474204659462
+                    ],
+                    "q99": [
+                        0.9377871316671368
+                    ]
+                },
+                "ry": {
+                    "max": [
+                        0.9999874830245972
+                    ],
+                    "min": [
+                        -0.7079970240592957
+                    ],
+                    "mean": [
+                        0.4951382279396057
+                    ],
+                    "std": [
+                        0.5218117237091064
+                    ],
+                    "q01": [
+                        -0.6808923494815826
+                    ],
+                    "q99": [
+                        0.9563051050901409
+                    ]
+                },
+                "rz": {
+                    "max": [
+                        0.9554369449615479
+                    ],
+                    "min": [
+                        -0.7688722014427185
+                    ],
+                    "mean": [
+                        0.09266126900911331
+                    ],
+                    "std": [
+                        0.16630452871322632
+                    ],
+                    "q01": [
+                        -0.36045596331357954
+                    ],
+                    "q99": [
+                        0.45990042358636823
+                    ]
+                },
+                "rw": {
+                    "max": [
+                        0.9914546012878418
+                    ],
+                    "min": [
+                        -0.4999994933605194
+                    ],
+                    "mean": [
+                        0.2097489982843399
+                    ],
+                    "std": [
+                        0.2754714787006378
+                    ],
+                    "q01": [
+                        -0.454380963742733
+                    ],
+                    "q99": [
+                        0.7216041100025177
+                    ]
+                },
+                "gripper": {
+                    "max": [
+                        1.0
+                    ],
+                    "min": [
+                        0.0
+                    ],
+                    "mean": [
+                        0.4261345863342285
+                    ],
+                    "std": [
+                        0.4554642140865326
+                    ],
+                    "q01": [
+                        0.0
+                    ],
+                    "q99": [
+                        1.0
+                    ]
+                }
+            },
+            "action": {
+                "x": {
+                    "max": [
+                        2.9984593391418457
+                    ],
+                    "min": [
+                        -2.0204520225524902
+                    ],
+                    "mean": [
+                        0.00698750140145421
+                    ],
+                    "std": [
+                        0.06921227276325226
+                    ],
+                    "q01": [
+                        -0.22453527510166169
+                    ],
+                    "q99": [
+                        0.17824687153100965
+                    ]
+                },
+                "y": {
+                    "max": [
+                        22.09052848815918
+                    ],
+                    "min": [
+                        -5.497899532318115
+                    ],
+                    "mean": [
+                        0.006265923380851746
+                    ],
+                    "std": [
+                        0.059654854238033295
+                    ],
+                    "q01": [
+                        -0.14820013284683228
+                    ],
+                    "q99": [
+                        0.14938379630446405
+                    ]
+                },
+                "z": {
+                    "max": [
+                        2.7507524490356445
+                    ],
+                    "min": [
+                        -2.031663417816162
+                    ],
+                    "mean": [
+                        -0.012625134550035
+                    ],
+                    "std": [
+                        0.07353131473064423
+                    ],
+                    "q01": [
+                        -0.231589707583189
+                    ],
+                    "q99": [
+                        0.21842354819178575
+                    ]
+                },
+                "roll": {
+                    "max": [
+                        1.570636510848999
+                    ],
+                    "min": [
+                        -1.569917917251587
+                    ],
+                    "mean": [
+                        0.04333191365003586
+                    ],
+                    "std": [
+                        0.1561005562543869
+                    ],
+                    "q01": [
+                        -0.3517994859814644
+                    ],
+                    "q99": [
+                        0.5892666035890578
+                    ]
+                },
+                "pitch": {
+                    "max": [
+                        1.5321086645126343
+                    ],
+                    "min": [
+                        -1.569892168045044
+                    ],
+                    "mean": [
+                        -0.005756167229264975
+                    ],
+                    "std": [
+                        0.13164213299751282
+                    ],
+                    "q01": [
+                        -0.4193011274933815
+                    ],
+                    "q99": [
+                        0.35272657424211445
+                    ]
+                },
+                "yaw": {
+                    "max": [
+                        1.5691522359848022
+                    ],
+                    "min": [
+                        -1.570419430732727
+                    ],
+                    "mean": [
+                        0.0009130979306064546
+                    ],
+                    "std": [
+                        0.1459326595067978
+                    ],
+                    "q01": [
+                        -0.43643461108207704
+                    ],
+                    "q99": [
+                        0.44796681255102094
+                    ]
+                },
+                "gripper": {
+                    "max": [
+                        1.0
+                    ],
+                    "min": [
+                        0.0
+                    ],
+                    "mean": [
+                        0.5354204773902893
+                    ],
+                    "std": [
+                        0.4971078634262085
+                    ],
+                    "q01": [
+                        0.0
+                    ],
+                    "q99": [
+                        1.0
+                    ]
+                }
+            }
+        },
+        "modalities": {
+            "video": {
+                "image": {
+                    "resolution": [
+                        320,
+                        256
+                    ],
+                    "channels": 3,
+                    "fps": 3.0
+                }
+            },
+            "state": {
+                "x": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                },
+                "y": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                },
+                "z": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                },
+                "rx": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                },
+                "ry": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                },
+                "rz": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                },
+                "rw": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                },
+                "gripper": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                }
+            },
+            "action": {
+                "x": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                },
+                "y": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                },
+                "z": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                },
+                "roll": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                },
+                "pitch": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                },
+                "yaw": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                },
+                "gripper": {
+                    "absolute": true,
+                    "rotation_type": null,
+                    "shape": [
+                        1
+                    ],
+                    "continuous": true
+                }
+            }
+        },
+        "embodiment_tag": "oxe"
+    }
+}

model-00001-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:14e54be691d887991fd4d6d3780bc6df0247838fd44e385d15704802ef82596a
+size 4999367032

model-00002-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0bdf6ae82a0d1585a06e5c56bc67298f608ecb6a8d0fba2f996d593d2f9ff28c
+size 2586508600

model.safetensors.index.json ADDED Viewed

The diff for this file is too large to render. See raw diff

results.csv ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ coke_can/matching_avg,coke_can/variant_avg,coke_can/matching/horizontal,coke_can/matching/vertical,coke_can/matching/standing,coke_can/variant/horizontal,coke_can/variant/vertical,coke_can/variant/standing,move_near/variant,move_near/matching,drawer/matching_avg,drawer/variant_avg,drawer/matching/open,drawer/matching/close,drawer/variant/open,drawer/variant/close,put_spoon_on_tablecloth/matching_partial,put_spoon_on_tablecloth/matching_entire,put_carrot_on_plate/matching_partial,put_carrot_on_plate/matching_entire,stack_green_block_on_yellow_block/matching_partial,stack_green_block_on_yellow_block/matching_entire,put_eggplant_in_basket/matching_partial,put_eggplant_in_basket/matching_entire,apple_in_drawer/matching_avg,apple_in_drawer/variant_avg,ckpt_name
2	+ 0.517,0.636,0.470,0.130,0.950,0.711,0.324,0.871,0.510,0.540,0.278,0.132,0.269,0.287,0.085,0.180,,,,,,,,,0.074,0.023,fractal_60K0

results.md ADDED Viewed

	@@ -0,0 +1,29 @@

+|                                                    | 0                    | 1               | 2         | 3      | 4      | 5         | 6          | 7           | 8       | 9       |
+|:---------------------------------------------------|:---------------------|:----------------|:----------|:-------|:-------|:----------|:-----------|:------------|:--------|:--------|
+| coke_can/matching_avg                              | 0.5166666666666666   | 0.857           | 0.71      | 0.567  | 0.787  | 0.17      | nan        | 0.027       | 0.163   | 0.727   |
+| coke_can/variant_avg                               | 0.6355555555555555   | 0.898           | 0.813     | 0.49   | 0.823  | 0.006     | nan        | 0.022       | 0.545   | nan     |
+| coke_can/matching/horizontal                       | 0.47                 | 0.96            | 0.86      | 0.82   | 0.74   | 0.21      | nan        | 0.05        | 0.27    | 0.85    |
+| coke_can/matching/vertical                         | 0.13                 | 0.9             | 0.79      | 0.33   | 0.74   | 0.21      | nan        | 0.0         | 0.03    | 0.43    |
+| coke_can/matching/standing                         | 0.95                 | 0.71            | 0.48      | 0.55   | 0.88   | 0.09      | nan        | 0.03        | 0.19    | 0.9     |
+| coke_can/variant/horizontal                        | 0.7111111111111111   | 0.969           | 0.92      | 0.569  | 0.822  | 0.005     | nan        | 0.022       | 0.711   | nan     |
+| coke_can/variant/vertical                          | 0.3244444444444444   | 0.76            | 0.704     | 0.204  | 0.754  | 0.0       | nan        | 0.013       | 0.271   | nan     |
+| coke_can/variant/standing                          | 0.8711111111111111   | 0.964           | 0.813     | 0.698  | 0.893  | 0.013     | nan        | 0.031       | 0.653   | nan     |
+| move_near/variant                                  | 0.51                 | 0.5             | 0.446     | 0.323  | 0.792  | 0.031     | nan        | 0.04        | 0.477   | nan     |
+| move_near/matching                                 | 0.54                 | 0.442           | 0.354     | 0.317  | 0.779  | 0.042     | nan        | 0.05        | 0.462   | 0.663   |
+| drawer/matching_avg                                | 0.2777777777777778   | 0.73            | 0.565     | 0.597  | 0.25   | 0.227     | nan        | 0.139       | 0.356   | 0.268   |
+| drawer/variant_avg                                 | 0.13227513227513227  | 0.323           | 0.267     | 0.294  | 0.353  | 0.011     | nan        | 0.069       | 0.177   | nan     |
+| drawer/matching/open                               | 0.26851851851851855  | 0.601           | 0.463     | 0.296  | 0.157  | 0.009     | nan        | 0.0         | 0.194   | 0.287   |
+| drawer/matching/close                              | 0.28703703703703703  | 0.861           | 0.667     | 0.891  | 0.343  | 0.444     | nan        | 0.278       | 0.518   | 0.25    |
+| drawer/variant/open                                | 0.08465608465608465  | 0.27            | 0.212     | 0.069  | 0.333  | 0.0       | nan        | 0.005       | 0.158   | nan     |
+| drawer/variant/close                               | 0.1798941798941799   | 0.376           | 0.323     | 0.519  | 0.372  | 0.021     | nan        | 0.132       | 0.195   | nan     |
+| put_spoon_on_tablecloth/matching_partial           | nan                  | nan             | nan       | 0.167  | nan    | 0.347     | 0.778      | nan         | 0.041   | 0.375   |
+| put_spoon_on_tablecloth/matching_entire            | nan                  | nan             | nan       | 0.0    | nan    | 0.125     | 0.472      | nan         | 0.0     | 0.208   |
+| put_carrot_on_plate/matching_partial               | nan                  | nan             | nan       | 0.208  | nan    | 0.528     | 0.278      | nan         | 0.333   | 0.333   |
+| put_carrot_on_plate/matching_entire                | nan                  | nan             | nan       | 0.042  | nan    | 0.083     | 0.097      | nan         | 0.0     | 0.25    |
+| stack_green_block_on_yellow_block/matching_partial | nan                  | nan             | nan       | 0.083  | nan    | 0.319     | 0.403      | nan         | 0.125   | 0.083   |
+| stack_green_block_on_yellow_block/matching_entire  | nan                  | nan             | nan       | 0.0    | nan    | 0.0       | 0.042      | nan         | 0.0     | 0.083   |
+| put_eggplant_in_basket/matching_partial            | nan                  | nan             | nan       | 0.0    | nan    | 0.667     | 0.875      | nan         | 0.083   | 0.0     |
+| put_eggplant_in_basket/matching_entire             | nan                  | nan             | nan       | 0.0    | nan    | 0.431     | 0.569      | nan         | 0.041   | 0.0     |
+| apple_in_drawer/matching_avg                       | 0.07407407407407407  | 0.065           | 0.13      | 0.213  | 0.037  | 0.0       | 0.0        | 0.0         | nan     | 0.361   |
+| apple_in_drawer/variant_avg                        | 0.022857142857142857 | 0.026           | 0.021     | 0.101  | 0.206  | 0.0       | 0.0        | 0.0         | nan     | nan     |
+| ckpt_name                                          | ours                 | RT-1(Converged) | RT-1(15%) | RT-1-X | RT-2-X | Octo-Base | Octo-Small | RT-1(begin) | OpenVLA | RoboVLM |

total.metrics ADDED Viewed

	@@ -0,0 +1,230 @@

+***Pick coke can results***
+--------------------
+horizontal sim variant avg success {'ours': 0.7111111111111111}
+horizontal real success {'ours': 0.0}
+horizontal MMRV 0.0
+horizontal pearson correlation 1
+vertical sim variant avg success {'ours': 0.3244444444444444}
+vertical real success {'ours': 0.0}
+vertical MMRV 0.0
+vertical pearson correlation 1
+standing sim variant avg success {'ours': 0.8711111111111111}
+standing real success {'ours': 0.0}
+standing MMRV 0.0
+standing pearson correlation 1
+avg_orientation_sim_variant_results [0.6355555555555555]
+avg_orientation_real_results [0.0]
+mean_maximum_rank_violation(avg_orientation_sim_variant_results, avg_orientation_real_results) 0.0
+pearson_correlation(avg_orientation_sim_variant_results, avg_orientation_real_results) 1
+--------------------
+Orientation horizontal, ckpt ours all robot arm visual matching success: [0.52, 0.44, 0.4, 0.52]
+Orientation vertical, ckpt ours all robot arm visual matching success: [0.12, 0.16, 0.16, 0.08]
+Orientation standing, ckpt ours all robot arm visual matching success: [0.96, 0.96, 0.96, 0.92]
+horizontal visual matching sim success {'ours': 0.47}
+horizontal real success {'ours': 0.0}
+horizontal MMRV 0.0
+horizontal pearson correlation 1
+horizontal kruskal:
+       each checkpoint kruskal:
+             KruskalResult(statistic=15.473684210526313, pvalue=8.36619614588189e-05)
+vertical visual matching sim success {'ours': 0.13}
+vertical real success {'ours': 0.0}
+vertical MMRV 0.0
+vertical pearson correlation 1
+vertical kruskal:
+       each checkpoint kruskal:
+             KruskalResult(statistic=3.1276595744679274, pvalue=0.07697417298127443)
+standing visual matching sim success {'ours': 0.95}
+standing real success {'ours': 0.0}
+standing MMRV 0.0
+standing pearson correlation 1
+standing kruskal:
+       each checkpoint kruskal:
+             KruskalResult(statistic=45.23076923076922, pvalue=1.7513181713489203e-11)
+avg_orientation_sim_visual_matching_results [0.5166666666666666]
+avg_orientation_real_results [0.0]
+mean_maximum_rank_violation(avg_orientation_sim_visual_matching_results, avg_orientation_real_results) 0.0
+pearson_correlation(avg_orientation_sim_visual_matching_results, avg_orientation_real_results) 1
+avg kruskal:
+       each checkpoint kruskal:
+             KruskalResult(statistic=52.35135135135139, pvalue=4.640791164425619e-13)
+********************
+***Move Near results***
+--------------------
+sim variant avg success {'ours': 0.51}
+real success {'ours': 0.0}
+MMRV 0.0
+pearson correlation 1
+--------------------
+Ckpt ours all robot arm visual matching success: [0.48, 0.52, 0.52, 0.64]
+sim visual matching success {'ours': 0.54}
+real success {'ours': 0.0}
+visual matching MMRV 0.0
+visual matching pearson correlation 1
+avg kruskal:
+       each checkpoint kruskal:
+             KruskalResult(statistic=43.27272727272723, pvalue=4.76174527067639e-11)
+********************
+***Drawer results***
+--------------------
+open sim variant avg success {'ours': 0.08465608465608465}
+open real success {'ours': 0.0}
+open MMRV 0.0
+open pearson correlation 1
+close sim variant avg success {'ours': 0.1798941798941799}
+close real success {'ours': 0.0}
+close MMRV 0.0
+close pearson correlation 1
+avg_sim_variant_results [0.13227513227513227]
+avg_real_results [0.0]
+mean_maximum_rank_violation(avg_sim_variant_results, avg_real_results) 0.0
+pearson_correlation(avg_sim_variant_results, avg_real_results) 1
+--------------------
+Drawer task open, ckpt ours all robot arm visual matching success: [0.25, 0.25, 0.3055555555555555, 0.25925925925925924]
+Drawer task close, ckpt ours all robot arm visual matching success: [0.2777777777777778, 0.3055555555555556, 0.2777777777777778, 0.25925925925925924]
+open visual matching sim success {'ours': 0.26851851851851855}
+open real success {'ours': 0.0}
+open MMRV 0.0
+open pearson correlation 1
+open kruskal:
+       each checkpoint kruskal:
+             KruskalResult(statistic=7.8936170212766354, pvalue=0.004960955562494715)
+close visual matching sim success {'ours': 0.28703703703703703}
+close real success {'ours': 0.0}
+close MMRV 0.0
+close pearson correlation 1
+close kruskal:
+       each checkpoint kruskal:
+             KruskalResult(statistic=9.217391304347847, pvalue=0.0023972688480227177)
+avg_sim_visual_matching_results [0.2777777777777778]
+avg_real_results [0.0]
+mean_maximum_rank_violation(avg_sim_visual_matching_results, avg_real_results) 0.0
+pearson_correlation(avg_sim_visual_matching_results, avg_real_results) 1
+avg kruskal:
+       each checkpoint kruskal:
+             KruskalResult(statistic=17.258064516129142, pvalue=3.263095871941237e-05)
+********************
+***Drawer results***
+--------------------
+put_apple_into_top_drawer sim variant avg success {'ours': 0.022857142857142857}
+put_apple_into_top_drawer real success {'ours': 0.0}
+put_apple_into_top_drawer MMRV 0.0
+put_apple_into_top_drawer pearson correlation 1
+avg_sim_variant_results [0.022857142857142857]
+avg_real_results [0.0]
+mean_maximum_rank_violation(avg_sim_variant_results, avg_real_results) 0.0
+pearson_correlation(avg_sim_variant_results, avg_real_results) 1
+--------------------
+Drawer task put_apple_into_top_drawer, ckpt ours all robot arm visual matching success: [0.07407407407407407, 0.04938271604938271, 0.037037037037037035, 0.0]
+put_apple_into_top_drawer visual matching sim success {'ours': 0.07407407407407407}
+put_apple_into_top_drawer real success {'ours': 0.0}
+put_apple_into_top_drawer MMRV 0.0
+put_apple_into_top_drawer pearson correlation 1
+put_apple_into_top_drawer kruskal:
+       each checkpoint kruskal:
+             KruskalResult(statistic=2.038461538461544, pvalue=0.15336468852414337)
+avg_sim_visual_matching_results [0.07407407407407407]
+avg_real_results [0.0]
+mean_maximum_rank_violation(avg_sim_visual_matching_results, avg_real_results) 0.0
+pearson_correlation(avg_sim_visual_matching_results, avg_real_results) 1
+avg kruskal:
+       each checkpoint kruskal:
+             KruskalResult(statistic=2.038461538461544, pvalue=0.15336468852414337)
+********************
+***Bridge Put On Env results***
+********** Results for put_spoon_on_tablecloth **********
+WARNING: avg_sim_success is nan for results/fractal_60K0/bridge_table_1_v1/arm_pd_ee_target_delta_pose_align2_gripper_pd_joint_pos/PutSpoonOnTableClothInScene-v0
+sim visual matching partial success {'ours': nan}
+real partial success {'ours': 0.0}
+visual matching MMRV (partial success) 0.0
+visual matching pearson correlation (partial success)  nan
+avg kruskal (partial success):
+       each checkpoint kruskal:
+             all same, 1.0
+sim visual matching success {'ours': nan}
+real success {'ours': 0.0}
+visual matching MMRV 0.0
+visual matching pearson correlation nan
+avg kruskal:
+       each checkpoint kruskal:
+             all same, 1.0
+********************
+********** Results for put_carrot_on_plate **********
+WARNING: avg_sim_success is nan for results/fractal_60K0/bridge_table_1_v1/arm_pd_ee_target_delta_pose_align2_gripper_pd_joint_pos/PutCarrotOnPlateInScene-v0
+sim visual matching partial success {'ours': nan}
+real partial success {'ours': 0.0}
+visual matching MMRV (partial success) 0.0
+visual matching pearson correlation (partial success)  nan
+avg kruskal (partial success):
+       each checkpoint kruskal:
+             all same, 1.0
+sim visual matching success {'ours': nan}
+real success {'ours': 0.0}
+visual matching MMRV 0.0
+visual matching pearson correlation nan
+avg kruskal:
+       each checkpoint kruskal:
+             all same, 1.0
+********************
+********** Results for stack_green_block_on_yellow_block **********
+WARNING: avg_sim_success is nan for results/fractal_60K0/bridge_table_1_v1/arm_pd_ee_target_delta_pose_align2_gripper_pd_joint_pos/StackGreenCubeOnYellowCubeBakedTexInScene-v0
+sim visual matching partial success {'ours': nan}
+real partial success {'ours': 0.0}
+visual matching MMRV (partial success) 0.0
+visual matching pearson correlation (partial success)  nan
+avg kruskal (partial success):
+       each checkpoint kruskal:
+             all same, 1.0
+sim visual matching success {'ours': nan}
+real success {'ours': 0.0}
+visual matching MMRV 0.0
+visual matching pearson correlation nan
+avg kruskal:
+       each checkpoint kruskal:
+             all same, 1.0
+********************
+********** Results for put_eggplant_in_basket **********
+WARNING: avg_sim_success is nan for results/fractal_60K0/bridge_table_1_v2/arm_pd_ee_target_delta_pose_align2_gripper_pd_joint_pos/PutEggplantInBasketScene-v0
+sim visual matching partial success {'ours': nan}
+real partial success {'ours': 0.0}
+visual matching MMRV (partial success) 0.0
+visual matching pearson correlation (partial success)  nan
+avg kruskal (partial success):
+       each checkpoint kruskal:
+             all same, 1.0
+sim visual matching success {'ours': nan}
+real success {'ours': 0.0}
+visual matching MMRV 0.0
+visual matching pearson correlation nan
+avg kruskal:
+       each checkpoint kruskal:
+             all same, 1.0
+********************
+   coke_can/matching_avg  coke_can/variant_avg  ...  apple_in_drawer/variant_avg     ckpt_name
+0               0.516667              0.635556  ...                     0.022857  fractal_60K0
+[1 rows x 27 columns]

trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7a753302bbcc9e7ffb9a6e28db12c62e43af074904fffb7c772eec27696c62f0
+size 5368