| { | |
| "architectures": [ | |
| "PerceiverMaskedLanguageModel" | |
| ], | |
| "model_config": { | |
| "activation_checkpointing": false, | |
| "activation_offloading": false, | |
| "decoder": { | |
| "cross_attention_residual": false, | |
| "cross_attention_widening_factor": 1, | |
| "dropout": 0.1, | |
| "freeze": false, | |
| "init_scale": 0.02, | |
| "max_seq_len": 2048, | |
| "num_cross_attention_heads": 8, | |
| "num_cross_attention_qk_channels": 256, | |
| "num_cross_attention_v_channels": 768, | |
| "num_output_query_channels": null, | |
| "vocab_size": 262 | |
| }, | |
| "encoder": { | |
| "cross_attention_widening_factor": 1, | |
| "dropout": 0.1, | |
| "first_cross_attention_layer_shared": false, | |
| "first_self_attention_block_shared": true, | |
| "freeze": false, | |
| "init_scale": 0.02, | |
| "max_seq_len": 2048, | |
| "num_cross_attention_heads": 8, | |
| "num_cross_attention_layers": 1, | |
| "num_cross_attention_qk_channels": 256, | |
| "num_cross_attention_v_channels": 1280, | |
| "num_input_channels": 768, | |
| "num_self_attention_blocks": 1, | |
| "num_self_attention_heads": 8, | |
| "num_self_attention_layers_per_block": 26, | |
| "num_self_attention_qk_channels": 256, | |
| "num_self_attention_v_channels": 1280, | |
| "params": null, | |
| "self_attention_widening_factor": 1, | |
| "vocab_size": 262 | |
| }, | |
| "num_latent_channels": 1280, | |
| "num_latents": 256 | |
| }, | |
| "model_type": "perceiver-io-masked-language-model", | |
| "tokenizer_class": "PerceiverTokenizer", | |
| "torch_dtype": "float32", | |
| "transformers_version": "4.28.0" | |
| } | |