VLN-PE / r2r /fine_tuned /cma /config.json
zhaohui
add r2r models
23a3cfe
{
"architectures": [
"CMANet"
],
"model_cfg": {
"checkpoint_folder": "data/checkpoints/20250619_cma_train/ckpts",
"eval": {
"action": null,
"ckpt_to_load": "",
"len_traj_act": null,
"max_steps": 195,
"num_sample": null,
"pm_threshold": null,
"rotation_threshold": null,
"sample": false,
"save_results": true,
"split": [
"val_seen"
],
"start_eval_epoch": -1,
"step_interval": 50,
"stop_mode": null,
"success_distance": 3.0,
"use_ckpt_config": false
},
"il": {
"batch_size": 2,
"camera_name": "pano_camera_0",
"ckpt_to_load": "",
"dataset_3dgs_root_dir": "data/datasets/3dgs",
"dataset_grutopia10_root_dir": "data/datasets/grutopia10",
"dataset_r2r_root_dir": "data/datasets/R2R_VLNCE_v1-3_preprocessed",
"epochs": 80,
"filter_failure": {
"min_rgb_nums": 15,
"use": true
},
"inflection_weight_coef": 3.2,
"lmdb_features_dir": "data/sample_episodes/20250211_sample_origin/sample_data.lmdb",
"lmdb_map_size": 1000000000000.0,
"load_from_ckpt": false,
"load_from_pretrain": false,
"loss": {
"alpha": 0.0001,
"dist_scale": 1
},
"lr": 0.0001,
"num_workers": 8,
"report_to": "wandb",
"save_filter_frozen_weights": false,
"save_interval_epochs": 5,
"save_interval_steps": null,
"use_descrete_dataset": null,
"use_iw": true,
"warmup_ratio": 0.05,
"weight_decay": 1e-05
},
"local_rank": 0,
"log_dir": "data/checkpoints/20250619_cma_train/logs",
"model": {
"ablate_depth": null,
"ablate_instruction": null,
"ablate_rgb": null,
"cross_modal_encoder": null,
"depth_encoder": {
"backbone": "resnet50",
"cnn_type": "VlnResnetDepthEncoder",
"ddppo_checkpoint": "data/ddppo-models/gibson-4plus-mp3d-train-val-test-resnet50.pth",
"output_size": 128,
"trainable": false
},
"diffusion_policy": null,
"distance_predictor": null,
"image_encoder": null,
"imu_encoder": null,
"instruction_encoder": {
"bidirectional": true,
"dataset_vocab": "data/datasets/R2R_VLNCE_v1-3_preprocessed/train/train.json.gz",
"embedding_file": "data/datasets/R2R_VLNCE_v1-3_preprocessed/embeddings.json.gz",
"embedding_size": 50,
"final_state_only": true,
"fine_tune_embeddings": false,
"hidden_size": 128,
"load_model": null,
"max_length": null,
"rnn_type": "LSTM",
"sensor_uuid": "instruction",
"use_pretrained_embeddings": true,
"vocab_size": 2504
},
"learn_angle": null,
"len_traj_act": 4,
"max_step": 200,
"normalize_rgb": null,
"policy_name": "CMA_Policy",
"prev_action_encoder": null,
"progress_monitor": {
"alpha": 1.0,
"concat_state_txt": null,
"use": true
},
"rgb_encoder": {
"cnn_type": "TorchVisionResNet50",
"output_size": 256,
"trainable": false
},
"seq2seq": null,
"state_encoder": {
"dropout_rate": null,
"hidden_size": 512,
"num_recurrent_layers": 2,
"rgb_depth_embed_method": null,
"rnn_type": "GRU",
"use_dropout": null
},
"stop_progress_predictor": null,
"text_encoder": null
},
"model_name": "cma",
"name": "20250619_cma_train",
"num_gpus": 1,
"output_dir": "data/checkpoints/20250619_cma_train/ckpts",
"seed": 0,
"tensorboard_dir": "data/checkpoints/20250619_cma_train/tensorboard",
"torch_gpu_id": 0,
"torch_gpu_ids": [
0
],
"world_size": 1
},
"model_type": "cma",
"torch_dtype": "float32",
"transformers_version": "4.26.1"
}