| { | |
| "_class_name": "ControlNetSVDModel", | |
| "_diffusers_version": "0.24.0", | |
| "addition_time_embed_dim": 256, | |
| "block_out_channels": [ | |
| 320, | |
| 640, | |
| 1280, | |
| 1280 | |
| ], | |
| "conditioning_channels": 3, | |
| "conditioning_embedding_out_channels": [ | |
| 16, | |
| 32, | |
| 96, | |
| 256 | |
| ], | |
| "cross_attention_dim": 1024, | |
| "down_block_types": [ | |
| "CrossAttnDownBlockSpatioTemporal", | |
| "CrossAttnDownBlockSpatioTemporal", | |
| "CrossAttnDownBlockSpatioTemporal", | |
| "DownBlockSpatioTemporal" | |
| ], | |
| "feature_channels": 160, | |
| "feature_out_channels": [ | |
| 160, | |
| 160, | |
| 256, | |
| 256 | |
| ], | |
| "in_channels": 9, | |
| "layers_per_block": 2, | |
| "num_attention_heads": [ | |
| 5, | |
| 10, | |
| 20, | |
| 20 | |
| ], | |
| "num_frames": 25, | |
| "out_channels": 4, | |
| "projection_class_embeddings_input_dim": 768, | |
| "sample_size": 96, | |
| "transformer_layers_per_block": 1, | |
| "up_block_types": [ | |
| "UpBlockSpatioTemporal", | |
| "CrossAttnUpBlockSpatioTemporal", | |
| "CrossAttnUpBlockSpatioTemporal", | |
| "CrossAttnUpBlockSpatioTemporal" | |
| ], | |
| "with_id_feature": false | |
| } | |