Qwen-GR00T-Bridge / config.json
Jinhuiye's picture
Update config.json
48b1db4 verified
{
"run_id": "1008_qwenLfm_briage",
"run_root_dir": "./results/Checkpoints",
"seed": 42,
"trackers": [
"jsonl",
"wandb"
],
"wandb_entity": "jinhuiye",
"wandb_project": "InternM1",
"is_debug": false,
"framework": {
"name": "QwenGR00T",
"qwenvl": {
"base_vlm": "./playground/Pretrained_models/Qwen2.5-VL-3B-Instruct",
"attn_implementation": "flash_attention_2",
"vl_hidden_dim": 2048
},
"dino": {
"dino_backbone": "dinov2_vits14"
},
"action_model": {
"action_model_type": "DiT-L",
"hidden_size": 1024,
"add_pos_embed": true,
"max_seq_len": 1024,
"action_dim": 7,
"state_dim": 7,
"future_action_window_size": 15,
"action_horizon": 16,
"past_action_window_size": 0,
"repeated_diffusion_steps": 8,
"noise_beta_alpha": 1.5,
"noise_beta_beta": 1.0,
"noise_s": 0.999,
"num_timestep_buckets": 1000,
"num_inference_timesteps": 4,
"num_target_vision_tokens": 32,
"diffusion_model_cfg": {
"cross_attention_dim": 2048,
"dropout": 0.2,
"final_dropout": true,
"interleave_self_attention": true,
"norm_type": "ada_norm",
"num_layers": 16,
"output_dim": 1024,
"positional_embeddings": null
},
"action_hidden_dim": 2048
}
},
"datasets": {
"vlm_data": {
"dataset_py": "vlm_datasets",
"dataformat": "llava_json",
"dataset_use": "aokvqa_cauldron_llava_format,sharegpt4v_coco,sharegpt4v_knowledge,sharegpt4v_llava,sharegpt4v_sam,asv2_conversation_en,asv2_detailed_description_en,asv2_region_captioning_en,coco_internvl_longcap_en,coco_karpathy_train_567_en,coco_negative_gpt4o_en,coco_poetry_zh,coco_rem_en_zh,cocorem_exist_yorn_en,cocotextv2_en,cocotextv2_gpt4o_en,okvqa_en,refcoco_grounding_aug_en,refcoco_grounding_en,tallyqa_coco_en,toloka_grounding_aug_en,vqav2_en,vsr_en",
"eval_dataset": "aokvqa_cauldron_llava_format",
"data_flatten": false,
"base_interval": 2,
"max_pixels": 50176,
"min_pixels": 784,
"model_max_length": 2048,
"model_type": "qwen2.5vl",
"per_device_batch_size": 3
},
"vla_data": {
"dataset_py": "lerobot_datasets",
"data_root_dir": "playground/Datasets/OXE_LEROBOT",
"data_mix": "bridge",
"action_type": "delta_ee",
"CoT_prompt": "Your task is {instruction}. To identify the key objects for your task. Locate their bounding boxes in [x1,y1,x2,y2] format.",
"CoT_answer": "bbox",
"default_image_resolution": [
3,
224,
224
],
"per_device_batch_size": 16,
"load_all_data_for_training": true,
"obs": [
"image_0"
],
"image_size": [
224,
224
]
}
},
"trainer": {
"epochs": 100,
"max_train_steps": 100000,
"num_warmup_steps": 10000,
"save_interval": 5000,
"eval_interval": 1000,
"learning_rate": {
"base": 3e-05,
"qwen_vl_interface": 1e-05,
"action_model": 0.0001
},
"lr_scheduler_type": "cosine_with_min_lr",
"scheduler_specific_kwargs": {
"min_lr": 5e-07
},
"freeze_modules": true,
"loss_scale": {
"vla": 1.0,
"vlm": 0.1
},
"repeated_diffusion_steps": 4,
"max_grad_norm": 1.0,
"warmup_ratio": 0.1,
"weight_decay": 0.0,
"logging_frequency": 10,
"gradient_clipping": 1.0,
"gradient_accumulation_steps": 1,
"optimizer": {
"name": "AdamW",
"betas": [
0.9,
0.95
],
"eps": 1e-08,
"weight_decay": 1e-08
},
"is_resume": false,
"resume_epoch": null,
"resume_step": null,
"enable_gradient_checkpointing": true,
"enable_mixed_precision_training": true
},
"is_resume": false,
"output_dir": "./results/Checkpoints/1008_qwenLfm_briage"
}