{ "architectures": [ "SpatialLMLlamaForCausalLM" ], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 128000, "eos_token_id": [ 128001, 128008, 128009 ], "head_dim": 64, "hidden_act": "silu", "hidden_size": 2048, "initializer_range": 0.02, "intermediate_size": 8192, "max_position_embeddings": 131072, "mlp_bias": false, "model_type": "spatiallm_llama", "num_attention_heads": 32, "num_hidden_layers": 16, "num_key_value_heads": 8, "point_backbone": "sonata", "point_config": { "enc_channels": [ 48, 96, 192, 384, 512 ], "enc_depths": [ 3, 3, 3, 12, 3 ], "enc_mode": true, "enc_num_head": [ 3, 6, 12, 24, 32 ], "enc_patch_size": [ 1024, 1024, 1024, 1024, 1024 ], "in_channels": 6, "mask_token": true, "mlp_ratio": 4, "order": [ "z", "z-trans", "hilbert", "hilbert-trans" ], "stride": [ 2, 2, 2, 2 ], "num_bins": 1280 }, "projector": "mlp", "point_end_token_id": 128012, "point_start_token_id": 128011, "point_token_id": 128013, "pretraining_tp": 1, "rms_norm_eps": 1e-05, "rope_scaling": { "factor": 32.0, "high_freq_factor": 4.0, "low_freq_factor": 1.0, "original_max_position_embeddings": 8192, "rope_type": "llama3" }, "rope_theta": 500000.0, "tie_word_embeddings": true, "torch_dtype": "float32", "transformers_version": "4.46.1", "use_cache": false, "vocab_size": 128256 }