sam-paech commited on
Commit
db6d29e
·
verified ·
1 Parent(s): ecd2ddb

Upload folder using huggingface_hub

Browse files
config.json CHANGED
@@ -22,56 +22,6 @@
22
  "hidden_size": 3840,
23
  "initializer_range": 0.02,
24
  "intermediate_size": 15360,
25
- "layer_types": [
26
- "sliding_attention",
27
- "sliding_attention",
28
- "sliding_attention",
29
- "sliding_attention",
30
- "sliding_attention",
31
- "full_attention",
32
- "sliding_attention",
33
- "sliding_attention",
34
- "sliding_attention",
35
- "sliding_attention",
36
- "sliding_attention",
37
- "full_attention",
38
- "sliding_attention",
39
- "sliding_attention",
40
- "sliding_attention",
41
- "sliding_attention",
42
- "sliding_attention",
43
- "full_attention",
44
- "sliding_attention",
45
- "sliding_attention",
46
- "sliding_attention",
47
- "sliding_attention",
48
- "sliding_attention",
49
- "full_attention",
50
- "sliding_attention",
51
- "sliding_attention",
52
- "sliding_attention",
53
- "sliding_attention",
54
- "sliding_attention",
55
- "full_attention",
56
- "sliding_attention",
57
- "sliding_attention",
58
- "sliding_attention",
59
- "sliding_attention",
60
- "sliding_attention",
61
- "full_attention",
62
- "sliding_attention",
63
- "sliding_attention",
64
- "sliding_attention",
65
- "sliding_attention",
66
- "sliding_attention",
67
- "full_attention",
68
- "sliding_attention",
69
- "sliding_attention",
70
- "sliding_attention",
71
- "sliding_attention",
72
- "sliding_attention",
73
- "full_attention"
74
- ],
75
  "max_position_embeddings": 131072,
76
  "model_type": "gemma3_text",
77
  "num_attention_heads": 16,
@@ -87,13 +37,12 @@
87
  "rope_theta": 1000000.0,
88
  "sliding_window": 1024,
89
  "sliding_window_pattern": 6,
90
- "torch_dtype": "float16",
91
  "use_cache": true,
92
  "vocab_size": 262208
93
  },
94
- "tie_word_embeddings": false,
95
- "torch_dtype": "float16",
96
- "transformers_version": "4.53.0",
97
  "unsloth_fixed": true,
98
  "vision_config": {
99
  "attention_dropout": 0.0,
@@ -107,7 +56,7 @@
107
  "num_channels": 3,
108
  "num_hidden_layers": 27,
109
  "patch_size": 14,
110
- "torch_dtype": "float16",
111
  "vision_use_head": false
112
  }
113
  }
 
22
  "hidden_size": 3840,
23
  "initializer_range": 0.02,
24
  "intermediate_size": 15360,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  "max_position_embeddings": 131072,
26
  "model_type": "gemma3_text",
27
  "num_attention_heads": 16,
 
37
  "rope_theta": 1000000.0,
38
  "sliding_window": 1024,
39
  "sliding_window_pattern": 6,
40
+ "torch_dtype": "bfloat16",
41
  "use_cache": true,
42
  "vocab_size": 262208
43
  },
44
+ "torch_dtype": "bfloat16",
45
+ "transformers_version": "4.51.3",
 
46
  "unsloth_fixed": true,
47
  "vision_config": {
48
  "attention_dropout": 0.0,
 
56
  "num_channels": 3,
57
  "num_hidden_layers": 27,
58
  "patch_size": 14,
59
+ "torch_dtype": "bfloat16",
60
  "vision_use_head": false
61
  }
62
  }
generation_config.json CHANGED
@@ -9,5 +9,5 @@
9
  "pad_token_id": 0,
10
  "top_k": 64,
11
  "top_p": 0.95,
12
- "transformers_version": "4.53.0"
13
  }
 
9
  "pad_token_id": 0,
10
  "top_k": 64,
11
  "top_p": 0.95,
12
+ "transformers_version": "4.51.3"
13
  }
model-00001-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:933f60dbb408cdfcf043b7ba8ab8c9bcc667ffdc02f1914780f006858e8306f0
3
  size 4979901696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79b21a1e5c49aa34a4636631697822c49b412d4e2d86dbdac94108dbcb06b7fe
3
  size 4979901696
model-00002-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed6e65010b1d38cc53d7d8863ac5e42ecb1af44c439281d642fad6cc4e9ebfa2
3
  size 4931296448
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8bd176408089ccecdf38e2e980d3204935b308c10d394ee68079253c09f9d1e
3
  size 4931296448
model-00003-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3f989d50c5e33b312da8b19c4d5f5dd20fec1109d20d5baf26da7bd9a2b01d2
3
  size 4931296512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d83dc4fe0301ad528b10d80672f72a9eedaead9337ff5f670b11094fc1c54a80
3
  size 4931296512
model-00004-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6dd1912a8e6dd105926cc824d453041c57ec82441a2ddaae1581e174ce7fe0d9
3
  size 4931296512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c611ea3e8b5980826259af07dd9fd4529bac10719118a3009c4cc7ef4c3881f2
3
  size 4931296512
model-00005-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d2577898949744ac81908a6ecf16825d35cd8a5570824341ec3cb8e369f947f
3
  size 4601000792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89ed8b73be376a9ea033ecd807e952714705c98ae6ebd8fd11d46d6e71951260
3
  size 4601000792