StarVLA
/

Qwen2.5-VL-GR00T-LIBERO-4in1

Model card Files Files and versions

Qwen2.5-VL-GR00T-LIBERO-4in1 / config.yaml

Jinhuiye's picture

Add files using upload-large-folder tool

9c312b7 verified 29 days ago

history blame contribute delete

3.08 kB

	run_id: 1025_libero_all_qwengroot
	run_root_dir: ./results/Checkpoints
	seed: 42
	trackers:
	- jsonl
	- wandb
	wandb_entity: jinhuiye
	wandb_project: StarVLA_Libero
	is_debug: false
	framework:
	name: QwenGR00T
	qwenvl:
	base_vlm: ./playground/Pretrained_models/Qwen2.5-VL-3B-Instruct
	attn_implementation: flash_attention_2
	vl_hidden_dim: 2048
	dino:
	dino_backbone: dinov2_vits14
	action_model:
	action_model_type: DiT-B
	action_hidden_dim: 1024
	hidden_size: 1024
	add_pos_embed: true
	max_seq_len: 1024
	action_dim: 7
	state_dim: 7
	future_action_window_size: 7
	action_horizon: 8
	past_action_window_size: 0
	repeated_diffusion_steps: 8
	noise_beta_alpha: 1.5
	noise_beta_beta: 1.0
	noise_s: 0.999
	num_timestep_buckets: 1000
	num_inference_timesteps: 4
	num_target_vision_tokens: 32
	diffusion_model_cfg:
	cross_attention_dim: 2048
	dropout: 0.2
	final_dropout: true
	interleave_self_attention: true
	norm_type: ada_norm
	num_layers: 16
	output_dim: 1024
	positional_embeddings: null
	reduce_in_full_precision: true
	datasets:
	vlm_data:
	dataset_py: vlm_datasets
	dataformat: llava_json
	dataset_use: asv2_conversation_en,asv2_detailed_description_en,asv2_region_captioning_en,coco_internvl_longcap_en,coco_karpathy_train_567_en,coco_negative_gpt4o_en,coco_poetry_zh,coco_rem_en_zh,cocorem_exist_yorn_en,cocotextv2_en,cocotextv2_gpt4o_en,okvqa_en,refcoco_grounding_aug_en,refcoco_grounding_en,tallyqa_coco_en,toloka_grounding_aug_en,vqav2_en,vsr_en
	eval_dataset: aokvqa_cauldron_llava_format
	data_flatten: false
	base_interval: 2
	max_pixels: 12845056
	min_pixels: 3136
	model_max_length: 2048
	model_type: qwen2.5vl
	per_device_batch_size: 3
	vla_data:
	dataset_py: lerobot_datasets
	data_root_dir: playground/Datasets/LEROBOT_LIBERO_DATA
	data_mix: libero_all
	action_type: delta_qpos
	CoT_prompt: Your task is {instruction}. To identify the key objects for your task.
	Locate their bounding boxes in [x1,y1,x2,y2] format.
	CoT_answer: bbox
	default_image_resolution:
	- 3
	- 224
	- 224
	per_device_batch_size: 16
	load_all_data_for_training: true
	obs:
	- image_0
	trainer:
	epochs: 100
	max_train_steps: 80000
	num_warmup_steps: 5000
	save_interval: 10000
	eval_interval: 1000
	learning_rate:
	base: 3.0e-05
	qwen_vl_interface: 1.0e-05
	action_model: 0.0001
	lr_scheduler_type: cosine_with_min_lr
	scheduler_specific_kwargs:
	min_lr: 1.0e-06
	freeze_modules: true
	loss_scale:
	vla: 1.0
	vlm: 0.1
	max_grad_norm: 1.0
	warmup_ratio: 0.1
	weight_decay: 0.0
	logging_frequency: 10
	gradient_clipping: 1.0
	gradient_accumulation_steps: 1
	optimizer:
	name: AdamW
	betas:
	- 0.9
	- 0.95
	eps: 1.0e-08
	weight_decay: 1.0e-08
	is_resume: false
	resume_epoch: null
	resume_step: null
	enable_gradient_checkpointing: true
	enable_mixed_precision_training: true
	is_resume: false
	output_dir: ./results/Checkpoints/1025_libero_all_qwengroot