| { | |
| "adaptors_kwargs": { | |
| "flow": { | |
| "class": "FlowAdaptor", | |
| "kwargs": { | |
| "base_shape": [ | |
| 224, | |
| 224 | |
| ], | |
| "flow_mean": [ | |
| 0.0, | |
| 0.0 | |
| ], | |
| "flow_std": [ | |
| 25, | |
| 25 | |
| ], | |
| "name": "flow", | |
| "output_normalized_coordinate": false, | |
| "scale_strategy": "scale_both" | |
| } | |
| } | |
| }, | |
| "detach_uncertainty_head": false, | |
| "encoder_kwargs": { | |
| "data_norm_type": "dinov2", | |
| "name": "dinov2_encoder", | |
| "patch_size": 14, | |
| "size": "large", | |
| "with_registers": false | |
| }, | |
| "encoder_str": "dinov2", | |
| "feature_head_kwargs": { | |
| "dpt_feature": { | |
| "feature_dim": 256, | |
| "hooks": [ | |
| 0, | |
| 1, | |
| 2, | |
| 3 | |
| ], | |
| "input_feature_dims": [ | |
| 1024, | |
| 768, | |
| 768, | |
| 768 | |
| ], | |
| "layer_dims": [ | |
| 96, | |
| 192, | |
| 384, | |
| 768 | |
| ], | |
| "output_width_ratio": 1, | |
| "patch_size": 14, | |
| "pretrained_checkpoint_path": null, | |
| "use_bn": false | |
| }, | |
| "dpt_processor": { | |
| "hidden_dims": [ | |
| 128, | |
| 128 | |
| ], | |
| "input_feature_dim": 256, | |
| "output_dim": 2, | |
| "pretrained_checkpoint_path": null | |
| } | |
| }, | |
| "head_type": "dpt", | |
| "inference_resolution": [ | |
| 560, | |
| 420 | |
| ], | |
| "info_sharing_and_head_structure": "dual+single", | |
| "info_sharing_kwargs": { | |
| "attn_drop": 0.0, | |
| "custom_positional_encoding": null, | |
| "depth": 12, | |
| "dim": 768, | |
| "drop_path": 0.0, | |
| "gradient_checkpointing": false, | |
| "indices": [ | |
| 5, | |
| 8 | |
| ], | |
| "init_values": null, | |
| "input_embed_dim": 1024, | |
| "max_num_views": 2, | |
| "mlp_ratio": 4.0, | |
| "name": "global_attention", | |
| "norm_intermediate": true, | |
| "num_heads": 12, | |
| "pretrained_checkpoint_path": null, | |
| "proj_drop": 0.0, | |
| "qk_norm": false, | |
| "qkv_bias": true, | |
| "size": "base", | |
| "use_rand_idx_pe_for_non_reference_views": false | |
| }, | |
| "info_sharing_str": "global_attention", | |
| "pretrained_backbone_checkpoint_path": null, | |
| "pretrained_checkpoint_path": null, | |
| "uncertainty_adaptors_kwargs": { | |
| "non_occluded_mask": { | |
| "class": "MaskAdaptor", | |
| "kwargs": { | |
| "name": "non_occluded_mask" | |
| } | |
| } | |
| }, | |
| "uncertainty_head_kwargs": { | |
| "dpt_feature": { | |
| "feature_dim": 256, | |
| "hooks": [ | |
| 0, | |
| 1, | |
| 2, | |
| 3 | |
| ], | |
| "input_feature_dims": [ | |
| 1024, | |
| 768, | |
| 768, | |
| 768 | |
| ], | |
| "layer_dims": [ | |
| 96, | |
| 192, | |
| 384, | |
| 768 | |
| ], | |
| "output_width_ratio": 1, | |
| "patch_size": 14, | |
| "pretrained_checkpoint_path": null, | |
| "use_bn": false | |
| }, | |
| "dpt_processor": { | |
| "hidden_dims": [ | |
| 128, | |
| 128 | |
| ], | |
| "input_feature_dim": 256, | |
| "output_dim": 1, | |
| "pretrained_checkpoint_path": null | |
| } | |
| }, | |
| "uncertainty_head_type": "dpt" | |
| } |