| { | |
| "train": { | |
| "project": "LunarLanderContinuous-v2-QGPO-VPSDE", | |
| "device": "cuda", | |
| "wandb": { | |
| "project": "IQL-LunarLanderContinuous-v2-QGPO-VPSDE" | |
| }, | |
| "simulator": { | |
| "type": "GymEnvSimulator", | |
| "args": { | |
| "env_id": "LunarLanderContinuous-v2" | |
| } | |
| }, | |
| "model": { | |
| "QGPOPolicy": { | |
| "device": "cuda", | |
| "critic": { | |
| "device": "cuda", | |
| "q_alpha": 1.0, | |
| "DoubleQNetwork": { | |
| "backbone": { | |
| "type": "ConcatenateMLP", | |
| "args": { | |
| "hidden_sizes": [ | |
| 10, | |
| 256, | |
| 256 | |
| ], | |
| "output_size": 1, | |
| "activation": "relu" | |
| } | |
| } | |
| } | |
| }, | |
| "diffusion_model": { | |
| "device": "cuda", | |
| "x_size": 2, | |
| "alpha": 1.0, | |
| "solver": { | |
| "type": "DPMSolver", | |
| "args": { | |
| "order": 2, | |
| "device": "cuda", | |
| "steps": 17 | |
| } | |
| }, | |
| "path": { | |
| "type": "linear_vp_sde", | |
| "beta_0": 0.1, | |
| "beta_1": 20.0 | |
| }, | |
| "reverse_path": { | |
| "type": "linear_vp_sde", | |
| "beta_0": 0.1, | |
| "beta_1": 20.0 | |
| }, | |
| "model": { | |
| "type": "noise_function", | |
| "args": { | |
| "t_encoder": { | |
| "type": "GaussianFourierProjectionTimeEncoder", | |
| "args": { | |
| "embed_dim": 32, | |
| "scale": 30.0 | |
| } | |
| }, | |
| "backbone": { | |
| "type": "TemporalSpatialResidualNet", | |
| "args": { | |
| "hidden_sizes": [ | |
| 512, | |
| 256, | |
| 128 | |
| ], | |
| "output_dim": 2, | |
| "t_dim": 32, | |
| "condition_dim": 8, | |
| "condition_hidden_dim": 32, | |
| "t_condition_hidden_dim": 128 | |
| } | |
| } | |
| } | |
| }, | |
| "energy_guidance": { | |
| "t_encoder": { | |
| "type": "GaussianFourierProjectionTimeEncoder", | |
| "args": { | |
| "embed_dim": 32, | |
| "scale": 30.0 | |
| } | |
| }, | |
| "backbone": { | |
| "type": "ConcatenateMLP", | |
| "args": { | |
| "hidden_sizes": [ | |
| 42, | |
| 256, | |
| 256 | |
| ], | |
| "output_size": 1, | |
| "activation": "silu" | |
| } | |
| } | |
| } | |
| } | |
| } | |
| }, | |
| "parameter": { | |
| "behaviour_policy": { | |
| "batch_size": 1024, | |
| "learning_rate": 0.0001, | |
| "epochs": 500 | |
| }, | |
| "action_augment_num": 16, | |
| "fake_data_t_span": null, | |
| "energy_guided_policy": { | |
| "batch_size": 256 | |
| }, | |
| "critic": { | |
| "stop_training_epochs": 500, | |
| "learning_rate": 0.0001, | |
| "discount_factor": 0.99, | |
| "update_momentum": 0.005 | |
| }, | |
| "energy_guidance": { | |
| "epochs": 1000, | |
| "learning_rate": 0.0001 | |
| }, | |
| "evaluation": { | |
| "evaluation_interval": 50, | |
| "guidance_scale": [ | |
| 0.0, | |
| 1.0, | |
| 2.0 | |
| ] | |
| }, | |
| "checkpoint_path": "./LunarLanderContinuous-v2-QGPO" | |
| } | |
| }, | |
| "deploy": { | |
| "device": "cuda", | |
| "env": { | |
| "env_id": "LunarLanderContinuous-v2", | |
| "seed": 0 | |
| }, | |
| "num_deploy_steps": 1000, | |
| "t_span": null | |
| } | |
| } |