krasserm
/

perceiver-io-mlm

perceiver-io-masked-language-model

Model card Files Files and versions

krasserm commited on Apr 23, 2023

Commit

864295b

·

1 Parent(s): bb3c238

Upload model

Files changed (2) hide show

config.json +50 -0
pytorch_model.bin +3 -0

config.json ADDED Viewed

	@@ -0,0 +1,50 @@

+{
+  "architectures": [
+    "PerceiverMaskedLanguageModel"
+  ],
+  "model_config": {
+    "activation_checkpointing": false,
+    "activation_offloading": false,
+    "decoder": {
+      "cross_attention_residual": false,
+      "cross_attention_widening_factor": 1,
+      "dropout": 0.1,
+      "freeze": false,
+      "init_scale": 0.02,
+      "max_seq_len": 2048,
+      "num_cross_attention_heads": 8,
+      "num_cross_attention_qk_channels": 256,
+      "num_cross_attention_v_channels": 768,
+      "num_output_query_channels": null,
+      "vocab_size": 262
+    },
+    "encoder": {
+      "cross_attention_widening_factor": 1,
+      "dropout": 0.1,
+      "first_cross_attention_layer_shared": false,
+      "first_self_attention_block_shared": true,
+      "freeze": false,
+      "init_scale": 0.02,
+      "max_seq_len": 2048,
+      "num_cross_attention_heads": 8,
+      "num_cross_attention_layers": 1,
+      "num_cross_attention_qk_channels": 256,
+      "num_cross_attention_v_channels": 1280,
+      "num_input_channels": 768,
+      "num_self_attention_blocks": 1,
+      "num_self_attention_heads": 8,
+      "num_self_attention_layers_per_block": 26,
+      "num_self_attention_qk_channels": 256,
+      "num_self_attention_v_channels": 1280,
+      "params": null,
+      "self_attention_widening_factor": 1,
+      "vocab_size": 262
+    },
+    "num_latent_channels": 1280,
+    "num_latents": 256
+  },
+  "model_type": "perceiver-io-masked-language-model",
+  "tokenizer_class": "PerceiverTokenizer",
+  "torch_dtype": "float32",
+  "transformers_version": "4.28.0"
+}

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0b5372d77cd9d06848f0d184ec48d200c0e068c4c1355205448bce8798c22ba9
+size 804586481