| { | |
| "architectures": [ | |
| "CMANet" | |
| ], | |
| "model_cfg": { | |
| "checkpoint_folder": "data/checkpoints/20250619_cma_train/ckpts", | |
| "eval": { | |
| "action": null, | |
| "ckpt_to_load": "", | |
| "len_traj_act": null, | |
| "max_steps": 195, | |
| "num_sample": null, | |
| "pm_threshold": null, | |
| "rotation_threshold": null, | |
| "sample": false, | |
| "save_results": true, | |
| "split": [ | |
| "val_seen" | |
| ], | |
| "start_eval_epoch": -1, | |
| "step_interval": 50, | |
| "stop_mode": null, | |
| "success_distance": 3.0, | |
| "use_ckpt_config": false | |
| }, | |
| "il": { | |
| "batch_size": 2, | |
| "camera_name": "pano_camera_0", | |
| "ckpt_to_load": "", | |
| "dataset_3dgs_root_dir": "data/datasets/3dgs", | |
| "dataset_grutopia10_root_dir": "data/datasets/grutopia10", | |
| "dataset_r2r_root_dir": "data/datasets/R2R_VLNCE_v1-3_preprocessed", | |
| "epochs": 80, | |
| "filter_failure": { | |
| "min_rgb_nums": 15, | |
| "use": true | |
| }, | |
| "inflection_weight_coef": 3.2, | |
| "lmdb_features_dir": "data/sample_episodes/20250211_sample_origin/sample_data.lmdb", | |
| "lmdb_map_size": 1000000000000.0, | |
| "load_from_ckpt": false, | |
| "load_from_pretrain": false, | |
| "loss": { | |
| "alpha": 0.0001, | |
| "dist_scale": 1 | |
| }, | |
| "lr": 0.0001, | |
| "num_workers": 8, | |
| "report_to": "wandb", | |
| "save_filter_frozen_weights": false, | |
| "save_interval_epochs": 5, | |
| "save_interval_steps": null, | |
| "use_descrete_dataset": null, | |
| "use_iw": true, | |
| "warmup_ratio": 0.05, | |
| "weight_decay": 1e-05 | |
| }, | |
| "local_rank": 0, | |
| "log_dir": "data/checkpoints/20250619_cma_train/logs", | |
| "model": { | |
| "ablate_depth": null, | |
| "ablate_instruction": null, | |
| "ablate_rgb": null, | |
| "cross_modal_encoder": null, | |
| "depth_encoder": { | |
| "backbone": "resnet50", | |
| "cnn_type": "VlnResnetDepthEncoder", | |
| "ddppo_checkpoint": "data/ddppo-models/gibson-4plus-mp3d-train-val-test-resnet50.pth", | |
| "output_size": 128, | |
| "trainable": false | |
| }, | |
| "diffusion_policy": null, | |
| "distance_predictor": null, | |
| "image_encoder": null, | |
| "imu_encoder": null, | |
| "instruction_encoder": { | |
| "bidirectional": true, | |
| "dataset_vocab": "data/datasets/R2R_VLNCE_v1-3_preprocessed/train/train.json.gz", | |
| "embedding_file": "data/datasets/R2R_VLNCE_v1-3_preprocessed/embeddings.json.gz", | |
| "embedding_size": 50, | |
| "final_state_only": true, | |
| "fine_tune_embeddings": false, | |
| "hidden_size": 128, | |
| "load_model": null, | |
| "max_length": null, | |
| "rnn_type": "LSTM", | |
| "sensor_uuid": "instruction", | |
| "use_pretrained_embeddings": true, | |
| "vocab_size": 2504 | |
| }, | |
| "learn_angle": null, | |
| "len_traj_act": 4, | |
| "max_step": 200, | |
| "normalize_rgb": null, | |
| "policy_name": "CMA_Policy", | |
| "prev_action_encoder": null, | |
| "progress_monitor": { | |
| "alpha": 1.0, | |
| "concat_state_txt": null, | |
| "use": true | |
| }, | |
| "rgb_encoder": { | |
| "cnn_type": "TorchVisionResNet50", | |
| "output_size": 256, | |
| "trainable": false | |
| }, | |
| "seq2seq": null, | |
| "state_encoder": { | |
| "dropout_rate": null, | |
| "hidden_size": 512, | |
| "num_recurrent_layers": 2, | |
| "rgb_depth_embed_method": null, | |
| "rnn_type": "GRU", | |
| "use_dropout": null | |
| }, | |
| "stop_progress_predictor": null, | |
| "text_encoder": null | |
| }, | |
| "model_name": "cma", | |
| "name": "20250619_cma_train", | |
| "num_gpus": 1, | |
| "output_dir": "data/checkpoints/20250619_cma_train/ckpts", | |
| "seed": 0, | |
| "tensorboard_dir": "data/checkpoints/20250619_cma_train/tensorboard", | |
| "torch_gpu_id": 0, | |
| "torch_gpu_ids": [ | |
| 0 | |
| ], | |
| "world_size": 1 | |
| }, | |
| "model_type": "cma", | |
| "torch_dtype": "float32", | |
| "transformers_version": "4.26.1" | |
| } | |