{ "_auto_class": null, "_name_or_path": "/scratch/giuliano_albanese/spear-hf", "architectures": [ "SPEAR1" ], "attribute_map": {}, "auto_map": { "AutoConfig": "configuration_spear.SPEAR1Config", "AutoModel": "modeling_spear.SPEAR1" }, "autoclass": "barrel.pipes.vlams.models.vlams.vlam.VLAM", "base_config_key": "", "control_module_config": { "control_decoder_config": { "block_config": { "activation": "GELU", "attn_implementation": "sdpa", "dropout": 0.0, "feature_size": 1024, "head_dim": 256, "hidden_size": 4096, "norm": "RMSNorm", "num_heads": 8, "num_kv_heads": 1, "position_embed_config": { "base": 10000, "cached": true, "embedding_dim": 256, "num_embeddings": 512 } }, "num_blocks": 18 }, "noised_control_proj_config": { "activation": "SiLU", "layers": [ 8, 2048, 1024, 1024 ], "norm": null, "time_embed": { "activation": "SiLU", "layers": [], "learnable_features": false, "max_period": 10000.0, "norm": null, "num_features": 1024 } }, "robot_state_proj_config": { "activation": "SiLU", "fourier": false, "layers": [ 8, 1024 ], "mode": "ee_pose_gripper" }, "rotation_components": 4, "token_size": 1024 }, "is_composition": false, "model_type": "spear1", "processor_config": { "control_io_config": { "future_control_offset_sec": 0.0, "future_controls_sequence_length": 5, "future_controls_sequence_stride_sec": 0.2, "future_frames_sequence_length": 1, "future_frames_sequence_stride_sec": null, "past_frames_sequence_length": 1, "past_frames_stride_sec": null, "past_scalars_sequence_length": 1, "past_scalars_stride_sec": null, "sequence_frames": 1, "sequence_frames_stride_sec": null }, "control_stats_path": "barrel/pipes/vlams/types/control_stats.yaml", "control_tokenizer_config": {}, "delta_controls": true, "distribution_hyperparams": { "alpha": 1.5, "beta": 1.0 }, "eef_control_frame": false, "image_resize": "smart", "joints_norm": { "high": [ 3.141592653589793, 3.141592653589793, 3.141592653589793, 3.141592653589793, 3.141592653589793, 3.141592653589793, 3.141592653589793 ], "low": [ -3.141592653589793, -3.141592653589793, -3.141592653589793, -3.141592653589793, -3.141592653589793, -3.141592653589793, -3.141592653589793 ] }, "num_inference_steps": 10, "obs_rotation_norm": "none", "obs_translation_norm": "bounds_q99", "observation_stats_path": "barrel/pipes/vlams/types/observation_stats.yaml", "r0_distribution": "uniform", "rotation_format": "quaternion", "rotation_norm": "none", "sig_min": 0.001, "timestep_distribution": "beta", "translation_norm": { "high": [ 0.04, 0.04, 0.04 ], "low": [ -0.04, -0.04, -0.04 ] } }, "sub_configs": {}, "torch_dtype": "float32", "transformers_version": "4.47.0", "vlm_config": { "attn_implementation": "flash_attention_2", "depth_tokens": 1024, "lm_head": false, "mean_resizing": false, "model_id": "google/paligemma-3b-mix-224", "paligemma_3d_config": { "depth_config": { "hf_filename": "moge/moge-vit-large-patch-14-backbone.pt", "hf_hub_repo": "nikonikolov/vlams" }, "depth_layers": 4, "depth_only": false, "mask_prob": 0.0, "projection": "features_add" }, "processor_config": { "image_sizes": { "main": { "height": 210, "width": 280 }, "wrist": { "height": 112, "width": 112 } }, "image_token": "", "max_language_tokens": 75 }, "train_only_depth_tokens": false } }