{ "metadata": { "ParamSize": 98, "ParamBytes": 2471628800.0, "BitsPerParam": 13.195367926733098 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 525336576, "records": [ { "name": "model.embed_tokens.weight", "shape": [ 128256, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 525336576, "byteOffset": 0 } ], "md5sum": "e105f9d02e6a8d7adfd8533855677f22" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.0.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "595ecf4cba27f5dcbaa7f61191faf7c7" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "a146ea08e79066f263468617118d49af" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.1.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "101c9352bd052402fab1cd73019bf107" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "757e94d880a0af3916245d1abf2f0cc9" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 20987904, "records": [ { "name": "model.layers.0.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 0 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 4096 }, { "name": "model.layers.0.self_attn.qkv_proj.weight", "shape": [ 3072, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 8192 }, { "name": "model.layers.0.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12591104 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20979712 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20983808 } ], "md5sum": "eeec9cc2059dad7fc0e50baca35353dc" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.10.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2eebc52872cfaa8781cccb04af7078eb" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "17d7431f3e56f56b8a4bff1c4ba21b16" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.weight", "shape": [ 3072, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20971520 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20975616 } ], "md5sum": "5c5d9c284f1135b5cc5ecfaf82125bdd" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.11.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4d10a0db7a7abd911eb62aaeca1d720b" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "4df532c18c7c8f280a7586515c561e99" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.weight", "shape": [ 3072, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20971520 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20975616 } ], "md5sum": "18479cbf5a0a611332bf8ae0c71379e4" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.12.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b60fd61965b1e36396b6236d7a819319" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "5fd85ab93fe4184608a942ff68c831d5" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.weight", "shape": [ 3072, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20971520 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20975616 } ], "md5sum": "3ec4aae8310b92ef01c96a707c6e08a2" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.13.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "67d8010b80a68556d81649050284df74" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "cad57947b90650ef9d736f7a55790e9f" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.weight", "shape": [ 3072, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20971520 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20975616 } ], "md5sum": "865bb489dd7004e175ea85b1d5b5c0bf" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.14.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "562a9766f8aee5d6cae0658bd4004b51" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "317da7cb6758a90a6c45d597e278e5d1" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.weight", "shape": [ 3072, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20971520 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20975616 } ], "md5sum": "b0aba64c8040950395c4bc49ca48e2ee" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.15.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1f766513ff7da0b5c561972774b2a224" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "70f847da51c9aec779ef299f8077afb4" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.weight", "shape": [ 3072, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20971520 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20975616 } ], "md5sum": "fb7030c87b00a6ef038ede6c02f03edf" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.2.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "458a2208b2dc94679a933ee33ea7b734" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "807a9c5343769339cb9624ae75ed216a" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.weight", "shape": [ 3072, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20971520 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20975616 } ], "md5sum": "471586eb83fe65def93831b3931b1ace" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.3.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7e87a68bf8426d2efb34586116b62903" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "0f07125504c8a77b1cc9f36cdb0b8022" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.weight", "shape": [ 3072, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20971520 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20975616 } ], "md5sum": "b440a26f6eb1c346fd7e88f33b41e5d1" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.4.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "908cdfdf40c642d1904e3e4ae2ba5a4e" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "e862a436e391ac68e461ef9667250725" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.weight", "shape": [ 3072, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20971520 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20975616 } ], "md5sum": "3385e68f9b376dca90177e3f7ea2387f" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.5.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "24f388cb71bf4acccf52493f3e6d73bf" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "c128f1b4cdcbee7d1fe0c440478735fa" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.weight", "shape": [ 3072, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20971520 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20975616 } ], "md5sum": "706adbde939028df26fa261a746815f8" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.6.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5b9437e4e072e1eb8587c150343955bf" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "940d4b77e813c39131c8b190a96afbdc" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.weight", "shape": [ 3072, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20971520 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20975616 } ], "md5sum": "4022f15d7e8dafd74a927a7f86d60ae2" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.7.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9dc3e73d039d10e184bcb9928815f072" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "0fe783a7fa4365570b61f40b1d02e992" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.weight", "shape": [ 3072, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20971520 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20975616 } ], "md5sum": "bd97f53b3a9121a51945a66bf7f39231" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.8.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "36dec0ec71ad4339fe5e7383866e04da" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "f8f919204af1a388d05201e6c2964dd2" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.weight", "shape": [ 3072, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20971520 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20975616 } ], "md5sum": "2b2a1fb94c2923cfd8cd195bd4222fb8" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.9.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "0d2cf5c5ba656a2c8fe434a8f95df88b" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "d56e3e4e2fb83591eeeacad6fa10e9e9" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.weight", "shape": [ 3072, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20971520 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20975616 } ], "md5sum": "abc498722ceb1de892496d16aece3ad4" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 20975616, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.weight", "shape": [ 3072, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20971520 } ], "md5sum": "d8f4800e3c7c9f10a351e9995707086f" } ] }