{ "architectures": ["SigLIP"], "auto_map": { "AutoConfig": "marqo_fashionSigLIP.MarqoFashionSigLIPConfig", "AutoModel": "marqo_fashionSigLIP.MarqoFashionSigLIP", "AutoProcessor": "marqo_fashionSigLIP.MarqoFashionSigLIPProcessor" }, "open_clip_model_name": "hf-hub:pySilver/marqo-fashionSigLIP-ST", "model_type": "siglip", "hidden_size": 768, "projection_dim": 768, "text_config": { "attention_dropout": 0.0, "bos_token_id": 49406, "eos_token_id": 49407, "hidden_act": "gelu_pytorch_tanh", "hidden_size": 768, "intermediate_size": 3072, "layer_norm_eps": 1e-6, "max_position_embeddings": 64, "model_type": "siglip_text_model", "num_attention_heads": 12, "num_hidden_layers": 12, "pad_token_id": 1, "transformers_version": "4.47.1", "vocab_size": 32000 }, "vision_config": { "attention_dropout": 0.0, "hidden_act": "gelu_pytorch_tanh", "hidden_size": 768, "image_size": 224, "intermediate_size": 3072, "layer_norm_eps": 1e-6, "model_type": "siglip_vision_model", "num_attention_heads": 12, "num_channels": 3, "num_hidden_layers": 12, "patch_size": 16, "transformers_version": "4.47.1" }, "initializer_factor": 1.0, "return_dict": true, "output_hidden_states": false, "output_attentions": false, "torchscript": false, "use_bfloat16": false, "tie_word_embeddings": true }