| { | |
| "architectures": [ | |
| "RDPNet" | |
| ], | |
| "model_cfg": { | |
| "checkpoint_folder": "data/checkpoints/20250619_rdp_train_new/ckpts", | |
| "eval": { | |
| "action": "descrete", | |
| "ckpt_to_load": null, | |
| "len_traj_act": 2, | |
| "max_steps": 195, | |
| "num_sample": 1, | |
| "pm_threshold": 0.9, | |
| "rotation_threshold": 0.01, | |
| "sample": true, | |
| "save_results": true, | |
| "split": [ | |
| "val_unseen" | |
| ], | |
| "start_eval_epoch": -1, | |
| "step_interval": 80, | |
| "stop_mode": "stop_progress", | |
| "success_distance": 3.0, | |
| "use_ckpt_config": false | |
| }, | |
| "il": { | |
| "batch_size": 4, | |
| "camera_name": "pano_camera_0", | |
| "ckpt_to_load": "", | |
| "dataset_3dgs_root_dir": "data/datasets/3dgs", | |
| "dataset_grutopia10_root_dir": "data/datasets/grutopia10", | |
| "dataset_r2r_root_dir": "data/datasets/R2R_VLNCE_v1-3_corrected", | |
| "epochs": 50, | |
| "filter_failure": { | |
| "min_rgb_nums": 15, | |
| "use": true | |
| }, | |
| "inflection_weight_coef": null, | |
| "lmdb_features_dir": "data/sample_episodes/20250211_sample_origin/sample_data.lmdb", | |
| "load_from_ckpt": false, | |
| "load_from_pretrain": true, | |
| "loss": { | |
| "alpha": 0.0001, | |
| "dist_scale": 1 | |
| }, | |
| "lr": 0.0001, | |
| "num_workers": 8, | |
| "report_to": "wandb", | |
| "save_filter_frozen_weights": true, | |
| "save_interval_epochs": 5, | |
| "save_interval_steps": null, | |
| "use_descrete_dataset": true, | |
| "use_iw": null, | |
| "warmup_ratio": 0.1, | |
| "weight_decay": 0.0001 | |
| }, | |
| "local_rank": 0, | |
| "log_dir": "data/checkpoints/20250619_rdp_train_new/logs", | |
| "model": { | |
| "ablate_depth": null, | |
| "ablate_instruction": null, | |
| "ablate_rgb": null, | |
| "cross_modal_encoder": { | |
| "hidden_size": 512, | |
| "input_type": 3, | |
| "load_model": false, | |
| "num_attention_heads": 8, | |
| "num_x_layers": 2, | |
| "txt_to_img": true, | |
| "txt_to_img_layer": 2 | |
| }, | |
| "depth_encoder": null, | |
| "diffusion_policy": { | |
| "action_stats": { | |
| "max": [ | |
| 0.25, | |
| 0.25, | |
| 0.27 | |
| ], | |
| "min": [ | |
| -0.25, | |
| -0.25, | |
| -0.27 | |
| ] | |
| }, | |
| "clip_sample": true, | |
| "cls_free_guidance_scale": 1.5, | |
| "cls_mask_method": "mask_token", | |
| "cls_mask_ratio": 0.25, | |
| "len_traj_pred": 8, | |
| "metric_waypoint_spacing": 1, | |
| "num_diffusion_iters": 20, | |
| "pred_type": "epsilon", | |
| "random_mask_instr": true, | |
| "random_mask_rgb": true, | |
| "scheduler": "DDPM", | |
| "stop_weight": 1, | |
| "transformer_encoding_size": 512, | |
| "transformer_n_cond_layers": 1, | |
| "transformer_n_layers": 3, | |
| "transformer_p_drop_emb": 0.2, | |
| "txt_len": 80, | |
| "type": "transformer", | |
| "use": true, | |
| "use_cls_free_guidance": true, | |
| "waypoint_spacing": 1 | |
| }, | |
| "distance_predictor": { | |
| "normalize": false, | |
| "use": false | |
| }, | |
| "eval": { | |
| "action": "descrete", | |
| "auto_remove": false, | |
| "ckpt_to_load": null, | |
| "distance_threshold": 1.5, | |
| "episode_count": -1, | |
| "len_traj_act": 4, | |
| "load_eval_subset": true, | |
| "max_len_traj_act": 8, | |
| "max_steps": 195, | |
| "min_displacement": 0.15, | |
| "min_len_traj_act": 3, | |
| "num_sample": 1, | |
| "pm_threshold": 0.9, | |
| "re_eval": false, | |
| "rotation_threshold": 0.01, | |
| "sample": null, | |
| "save_results": true, | |
| "sim_cfg_file": "vln/configs/sim_cfg_policy_h1_eval.yaml", | |
| "split": [ | |
| "val_unseen" | |
| ], | |
| "start_eval_epoch": -1, | |
| "step_interval": 80, | |
| "stop_mode": "stop_progress", | |
| "stop_progress_threshold": 0.85, | |
| "stop_x_threshold": 0.015, | |
| "stop_y_threshold": 0.015, | |
| "stop_yaw_threshold": 0.05, | |
| "success_distance": 3.0, | |
| "train_eval_interval": 100, | |
| "use_ckpt_config": false, | |
| "use_dynamic_len_traj_act": false, | |
| "vln_cfg_file": "vln/configs/vln_cfg_policy_eval.yaml" | |
| }, | |
| "image_encoder": { | |
| "depth": { | |
| "backbone": "resnet50", | |
| "bottleneck": "resnet", | |
| "cnn_type": "VlnResnetDepthEncoder", | |
| "ddppo_checkpoint": "data/ddppo-models/gibson-4plus-mp3d-train-val-test-resnet50.pth", | |
| "feature_dim": 768, | |
| "load_model": true, | |
| "output_size": 128, | |
| "projection_dim": 512, | |
| "update_depth_encoder": false | |
| }, | |
| "dropout": 0.1, | |
| "env_drop": 0.3, | |
| "img_stack_nums": 4, | |
| "rgb": { | |
| "feature_dim": 768, | |
| "img_mod": "multi_patches_avg_pooling", | |
| "load_model": true, | |
| "model_name": "clip-long", | |
| "model_path": "data/pretrained/clip-long/longclip-B.pt", | |
| "multi_patches_num": 5, | |
| "projection_dim": 512, | |
| "rgb_proj": false, | |
| "update_rgb_encoder": false | |
| }, | |
| "use_env_drop": true, | |
| "use_stack": false | |
| }, | |
| "imu_encoder": { | |
| "encoding_size": 64, | |
| "input_size": 3, | |
| "to_local_coords": true, | |
| "use": true | |
| }, | |
| "instruction_encoder": null, | |
| "learn_angle": true, | |
| "len_traj_act": 4, | |
| "max_step": 200, | |
| "normalize_rgb": null, | |
| "policy_name": "RDP_Policy", | |
| "prev_action_encoder": { | |
| "encoding_size": 64, | |
| "input_size": null, | |
| "to_local_coords": null, | |
| "type": "continuous", | |
| "use": null | |
| }, | |
| "progress_monitor": { | |
| "alpha": null, | |
| "concat_state_txt": true, | |
| "use": true | |
| }, | |
| "rgb_encoder": null, | |
| "seq2seq": null, | |
| "state_encoder": { | |
| "dropout_rate": 0.2, | |
| "hidden_size": 512, | |
| "num_recurrent_layers": 1, | |
| "rgb_depth_embed_method": "flat", | |
| "rnn_type": "GRU", | |
| "use_dropout": false | |
| }, | |
| "stop_progress_predictor": { | |
| "concat_state_txt": true, | |
| "loss_alpha": 10, | |
| "type": "continuous", | |
| "use": true | |
| }, | |
| "text_encoder": { | |
| "ablate": null, | |
| "embedding_size": 512, | |
| "eot_token": 49407, | |
| "final_state_only": null, | |
| "hidden_size": 512, | |
| "load_model": true, | |
| "max_length": 248, | |
| "model_name": "clip-long", | |
| "model_path": "data/pretrained/clip-long/longclip-B.pt", | |
| "num_l_layers": 6, | |
| "pad_token": 0, | |
| "sot_token": 49406, | |
| "type": "clip-long", | |
| "update_text_encoder": false, | |
| "vocab_size": 50265 | |
| }, | |
| "use_iw": false | |
| }, | |
| "model_name": "rdp", | |
| "name": "20250619_rdp_train_new", | |
| "num_gpus": 4, | |
| "output_dir": "data/checkpoints/20250619_rdp_train_new/ckpts", | |
| "seed": 0, | |
| "tensorboard_dir": "data/checkpoints/20250619_rdp_train_new/tensorboard", | |
| "torch_gpu_id": 0, | |
| "torch_gpu_ids": [ | |
| 0, | |
| 1, | |
| 2, | |
| 3 | |
| ], | |
| "world_size": 4 | |
| }, | |
| "model_type": "rdp", | |
| "torch_dtype": "float32", | |
| "transformers_version": "4.26.1" | |
| } | |