{ "dd_meta_major_version": 1, "dd_meta_minor_version": 4, "state_table_updates": [ { "state_table_idx": 0, "update_func": 1, "update_arg": 1 } ], "op_list": [ { "name": "MatMulNBits_2_0", "type": "MladfMatMul", "in_args": [ "/model/layers.0/input_layernorm/output_0.out5_4_0" ], "const_args": [ "model.layers.0.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.0.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.0.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.0.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.0/attn/qk_proj/MatMulNBits/output_0.out5_4_0" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "6144" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.0.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.0/input_layernorm/output_0.out5_4_0" ], "const_args": [ "model.layers.0.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.0.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.0.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.0.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.0.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "3", "1" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "192" ] } } }, { "name": "/model/layers.0/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.0/attn/qk_proj/MatMulNBits/output_0.out5_4_0", "past_key_values.0.key", "past_key_values.0.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.0/attn/GroupQueryAttention/output_0.out2_0", "present.0.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "32" ] }, "kv_num_heads": { "type": "int", "value": [ "32" ] }, "scale": { "type": "float", "value": [ "0.10206207633018494" ] }, "local_window_size": { "type": "int", "value": [ "262144" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "32", "32", "1", "4096", "96" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "0", "0", "2", "0", "1", "1", "6", "0", "2", "0" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "192", "6", "0", "0", "192" ] } } }, { "name": "layers.0.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.0/attn/GroupQueryAttention/output_0.out2_0" ], "const_args": [ "model.layers.0.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.0.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.0.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.0.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.0/attn/o_proj/MatMulNBits/output_0.out5_4_1" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_0", "type": "FlatRMSAdd", "in_args": [ "/model/embed_tokens/Gather/output_0.out4_0", "/model/layers.0/attn/o_proj/MatMulNBits/output_0.out5_4_1" ], "const_args": [ "model.layers.0.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.0/post_attention_layernorm/output_3.out4_0", "/model/layers.0/post_attention_layernorm/output_0.out4_0" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_0", "type": "FlatMLP", "in_args": [ "/model/layers.0/post_attention_layernorm/output_0.out4_0" ], "const_args": [ "model.layers.0.mlp.gate_proj.MatMulNBits.qweight", "model.layers.0.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.0.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.0.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.0.mlp.up_proj.MatMulNBits.qweight", "model.layers.0.mlp.up_proj.MatMulNBits.scales.f", "model.layers.0.mlp.up_proj.MatMulNBits.qzeros", "model.layers.0.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.0/mlp/Mul/output_0.out3_0" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "3072", "8192" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.0.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.0/mlp/Mul/output_0.out3_0" ], "const_args": [ "model.layers.0.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.0.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.0.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.0.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.0/mlp/down_proj/MatMulNBits/output_0.out5_4_2" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8192" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_1", "type": "FlatRMSAdd", "in_args": [ "/model/layers.0/post_attention_layernorm/output_3.out4_0", "/model/layers.0/mlp/down_proj/MatMulNBits/output_0.out5_4_2" ], "const_args": [ "model.layers.1.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.1/input_layernorm/output_3.out4_1", "/model/layers.1/input_layernorm/output_0.out4_1" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_1", "type": "MladfMatMul", "in_args": [ "/model/layers.1/input_layernorm/output_0.out4_1" ], "const_args": [ "model.layers.1.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.1.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.1.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.1.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.1/attn/qk_proj/MatMulNBits/output_0.out5_4_3" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "6144" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.1.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.1/input_layernorm/output_0.out4_1" ], "const_args": [ "model.layers.1.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.1.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.1.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.1.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.1.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "7", "3" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "192" ] } } }, { "name": "/model/layers.1/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.1/attn/qk_proj/MatMulNBits/output_0.out5_4_3", "past_key_values.1.key", "past_key_values.1.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.1/attn/GroupQueryAttention/output_0.out2_1", "present.1.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "32" ] }, "kv_num_heads": { "type": "int", "value": [ "32" ] }, "scale": { "type": "float", "value": [ "0.10206207633018494" ] }, "local_window_size": { "type": "int", "value": [ "262144" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "32", "32", "1", "4096", "96" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "4", "2", "2", "0", "5", "3", "6", "0", "6", "2" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "192", "6", "0", "0", "192" ] } } }, { "name": "layers.1.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.1/attn/GroupQueryAttention/output_0.out2_1" ], "const_args": [ "model.layers.1.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.1.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.1.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.1.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.1/attn/o_proj/MatMulNBits/output_0.out5_4_4" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_2", "type": "FlatRMSAdd", "in_args": [ "/model/layers.1/input_layernorm/output_3.out4_1", "/model/layers.1/attn/o_proj/MatMulNBits/output_0.out5_4_4" ], "const_args": [ "model.layers.1.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.1/post_attention_layernorm/output_3.out4_2", "/model/layers.1/post_attention_layernorm/output_0.out4_2" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_1", "type": "FlatMLP", "in_args": [ "/model/layers.1/post_attention_layernorm/output_0.out4_2" ], "const_args": [ "model.layers.1.mlp.gate_proj.MatMulNBits.qweight", "model.layers.1.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.1.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.1.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.1.mlp.up_proj.MatMulNBits.qweight", "model.layers.1.mlp.up_proj.MatMulNBits.scales.f", "model.layers.1.mlp.up_proj.MatMulNBits.qzeros", "model.layers.1.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.1/mlp/Mul/output_0.out3_1" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "3072", "8192" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.1.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.1/mlp/Mul/output_0.out3_1" ], "const_args": [ "model.layers.1.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.1.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.1.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.1.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.1/mlp/down_proj/MatMulNBits/output_0.out5_4_5" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8192" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_3", "type": "FlatRMSAdd", "in_args": [ "/model/layers.1/post_attention_layernorm/output_3.out4_2", "/model/layers.1/mlp/down_proj/MatMulNBits/output_0.out5_4_5" ], "const_args": [ "model.layers.2.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.2/input_layernorm/output_3.out4_3", "/model/layers.2/input_layernorm/output_0.out4_3" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_2", "type": "MladfMatMul", "in_args": [ "/model/layers.2/input_layernorm/output_0.out4_3" ], "const_args": [ "model.layers.2.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.2.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.2.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.2.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.2/attn/qk_proj/MatMulNBits/output_0.out5_4_6" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "6144" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.2.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.2/input_layernorm/output_0.out4_3" ], "const_args": [ "model.layers.2.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.2.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.2.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.2.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.2.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "11", "5" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "192" ] } } }, { "name": "/model/layers.2/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.2/attn/qk_proj/MatMulNBits/output_0.out5_4_6", "past_key_values.2.key", "past_key_values.2.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.2/attn/GroupQueryAttention/output_0.out2_2", "present.2.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "32" ] }, "kv_num_heads": { "type": "int", "value": [ "32" ] }, "scale": { "type": "float", "value": [ "0.10206207633018494" ] }, "local_window_size": { "type": "int", "value": [ "262144" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "32", "32", "1", "4096", "96" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "8", "4", "2", "0", "9", "5", "6", "0", "10", "4" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "192", "6", "0", "0", "192" ] } } }, { "name": "layers.2.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.2/attn/GroupQueryAttention/output_0.out2_2" ], "const_args": [ "model.layers.2.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.2.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.2.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.2.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.2/attn/o_proj/MatMulNBits/output_0.out5_4_7" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_4", "type": "FlatRMSAdd", "in_args": [ "/model/layers.2/input_layernorm/output_3.out4_3", "/model/layers.2/attn/o_proj/MatMulNBits/output_0.out5_4_7" ], "const_args": [ "model.layers.2.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.2/post_attention_layernorm/output_3.out4_4", "/model/layers.2/post_attention_layernorm/output_0.out4_4" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_2", "type": "FlatMLP", "in_args": [ "/model/layers.2/post_attention_layernorm/output_0.out4_4" ], "const_args": [ "model.layers.2.mlp.gate_proj.MatMulNBits.qweight", "model.layers.2.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.2.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.2.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.2.mlp.up_proj.MatMulNBits.qweight", "model.layers.2.mlp.up_proj.MatMulNBits.scales.f", "model.layers.2.mlp.up_proj.MatMulNBits.qzeros", "model.layers.2.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.2/mlp/Mul/output_0.out3_2" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "3072", "8192" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.2.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.2/mlp/Mul/output_0.out3_2" ], "const_args": [ "model.layers.2.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.2.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.2.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.2.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.2/mlp/down_proj/MatMulNBits/output_0.out5_4_8" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8192" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_5", "type": "FlatRMSAdd", "in_args": [ "/model/layers.2/post_attention_layernorm/output_3.out4_4", "/model/layers.2/mlp/down_proj/MatMulNBits/output_0.out5_4_8" ], "const_args": [ "model.layers.3.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.3/input_layernorm/output_3.out4_5", "/model/layers.3/input_layernorm/output_0.out4_5" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_3", "type": "MladfMatMul", "in_args": [ "/model/layers.3/input_layernorm/output_0.out4_5" ], "const_args": [ "model.layers.3.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.3.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.3.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.3.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.3/attn/qk_proj/MatMulNBits/output_0.out5_4_9" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "6144" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.3.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.3/input_layernorm/output_0.out4_5" ], "const_args": [ "model.layers.3.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.3.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.3.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.3.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.3.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "15", "7" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "192" ] } } }, { "name": "/model/layers.3/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.3/attn/qk_proj/MatMulNBits/output_0.out5_4_9", "past_key_values.3.key", "past_key_values.3.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.3/attn/GroupQueryAttention/output_0.out2_3", "present.3.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "32" ] }, "kv_num_heads": { "type": "int", "value": [ "32" ] }, "scale": { "type": "float", "value": [ "0.10206207633018494" ] }, "local_window_size": { "type": "int", "value": [ "262144" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "32", "32", "1", "4096", "96" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "12", "6", "2", "0", "13", "7", "6", "0", "14", "6" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "192", "6", "0", "0", "192" ] } } }, { "name": "layers.3.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.3/attn/GroupQueryAttention/output_0.out2_3" ], "const_args": [ "model.layers.3.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.3.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.3.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.3.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.3/attn/o_proj/MatMulNBits/output_0.out5_4_10" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_6", "type": "FlatRMSAdd", "in_args": [ "/model/layers.3/input_layernorm/output_3.out4_5", "/model/layers.3/attn/o_proj/MatMulNBits/output_0.out5_4_10" ], "const_args": [ "model.layers.3.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.3/post_attention_layernorm/output_3.out4_6", "/model/layers.3/post_attention_layernorm/output_0.out4_6" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_3", "type": "FlatMLP", "in_args": [ "/model/layers.3/post_attention_layernorm/output_0.out4_6" ], "const_args": [ "model.layers.3.mlp.gate_proj.MatMulNBits.qweight", "model.layers.3.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.3.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.3.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.3.mlp.up_proj.MatMulNBits.qweight", "model.layers.3.mlp.up_proj.MatMulNBits.scales.f", "model.layers.3.mlp.up_proj.MatMulNBits.qzeros", "model.layers.3.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.3/mlp/Mul/output_0.out3_3" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "3072", "8192" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.3.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.3/mlp/Mul/output_0.out3_3" ], "const_args": [ "model.layers.3.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.3.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.3.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.3.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.3/mlp/down_proj/MatMulNBits/output_0.out5_4_11" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8192" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_7", "type": "FlatRMSAdd", "in_args": [ "/model/layers.3/post_attention_layernorm/output_3.out4_6", "/model/layers.3/mlp/down_proj/MatMulNBits/output_0.out5_4_11" ], "const_args": [ "model.layers.4.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.4/input_layernorm/output_3.out4_7", "/model/layers.4/input_layernorm/output_0.out4_7" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_4", "type": "MladfMatMul", "in_args": [ "/model/layers.4/input_layernorm/output_0.out4_7" ], "const_args": [ "model.layers.4.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.4.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.4.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.4.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.4/attn/qk_proj/MatMulNBits/output_0.out5_4_12" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "6144" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.4.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.4/input_layernorm/output_0.out4_7" ], "const_args": [ "model.layers.4.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.4.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.4.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.4.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.4.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "19", "9" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "192" ] } } }, { "name": "/model/layers.4/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.4/attn/qk_proj/MatMulNBits/output_0.out5_4_12", "past_key_values.4.key", "past_key_values.4.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.4/attn/GroupQueryAttention/output_0.out2_4", "present.4.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "32" ] }, "kv_num_heads": { "type": "int", "value": [ "32" ] }, "scale": { "type": "float", "value": [ "0.10206207633018494" ] }, "local_window_size": { "type": "int", "value": [ "262144" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "32", "32", "1", "4096", "96" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "16", "8", "2", "0", "17", "9", "6", "0", "18", "8" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "192", "6", "0", "0", "192" ] } } }, { "name": "layers.4.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.4/attn/GroupQueryAttention/output_0.out2_4" ], "const_args": [ "model.layers.4.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.4.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.4.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.4.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.4/attn/o_proj/MatMulNBits/output_0.out5_4_13" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_8", "type": "FlatRMSAdd", "in_args": [ "/model/layers.4/input_layernorm/output_3.out4_7", "/model/layers.4/attn/o_proj/MatMulNBits/output_0.out5_4_13" ], "const_args": [ "model.layers.4.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.4/post_attention_layernorm/output_3.out4_8", "/model/layers.4/post_attention_layernorm/output_0.out4_8" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_4", "type": "FlatMLP", "in_args": [ "/model/layers.4/post_attention_layernorm/output_0.out4_8" ], "const_args": [ "model.layers.4.mlp.gate_proj.MatMulNBits.qweight", "model.layers.4.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.4.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.4.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.4.mlp.up_proj.MatMulNBits.qweight", "model.layers.4.mlp.up_proj.MatMulNBits.scales.f", "model.layers.4.mlp.up_proj.MatMulNBits.qzeros", "model.layers.4.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.4/mlp/Mul/output_0.out3_4" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "3072", "8192" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.4.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.4/mlp/Mul/output_0.out3_4" ], "const_args": [ "model.layers.4.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.4.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.4.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.4.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.4/mlp/down_proj/MatMulNBits/output_0.out5_4_14" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8192" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_9", "type": "FlatRMSAdd", "in_args": [ "/model/layers.4/post_attention_layernorm/output_3.out4_8", "/model/layers.4/mlp/down_proj/MatMulNBits/output_0.out5_4_14" ], "const_args": [ "model.layers.5.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.5/input_layernorm/output_3.out4_9", "/model/layers.5/input_layernorm/output_0.out4_9" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_5", "type": "MladfMatMul", "in_args": [ "/model/layers.5/input_layernorm/output_0.out4_9" ], "const_args": [ "model.layers.5.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.5.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.5.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.5.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.5/attn/qk_proj/MatMulNBits/output_0.out5_4_15" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "6144" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.5.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.5/input_layernorm/output_0.out4_9" ], "const_args": [ "model.layers.5.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.5.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.5.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.5.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.5.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "23", "11" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "192" ] } } }, { "name": "/model/layers.5/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.5/attn/qk_proj/MatMulNBits/output_0.out5_4_15", "past_key_values.5.key", "past_key_values.5.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.5/attn/GroupQueryAttention/output_0.out2_5", "present.5.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "32" ] }, "kv_num_heads": { "type": "int", "value": [ "32" ] }, "scale": { "type": "float", "value": [ "0.10206207633018494" ] }, "local_window_size": { "type": "int", "value": [ "262144" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "32", "32", "1", "4096", "96" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "20", "10", "2", "0", "21", "11", "6", "0", "22", "10" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "192", "6", "0", "0", "192" ] } } }, { "name": "layers.5.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.5/attn/GroupQueryAttention/output_0.out2_5" ], "const_args": [ "model.layers.5.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.5.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.5.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.5.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.5/attn/o_proj/MatMulNBits/output_0.out5_4_16" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_10", "type": "FlatRMSAdd", "in_args": [ "/model/layers.5/input_layernorm/output_3.out4_9", "/model/layers.5/attn/o_proj/MatMulNBits/output_0.out5_4_16" ], "const_args": [ "model.layers.5.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.5/post_attention_layernorm/output_3.out4_10", "/model/layers.5/post_attention_layernorm/output_0.out4_10" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_5", "type": "FlatMLP", "in_args": [ "/model/layers.5/post_attention_layernorm/output_0.out4_10" ], "const_args": [ "model.layers.5.mlp.gate_proj.MatMulNBits.qweight", "model.layers.5.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.5.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.5.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.5.mlp.up_proj.MatMulNBits.qweight", "model.layers.5.mlp.up_proj.MatMulNBits.scales.f", "model.layers.5.mlp.up_proj.MatMulNBits.qzeros", "model.layers.5.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.5/mlp/Mul/output_0.out3_5" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "3072", "8192" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.5.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.5/mlp/Mul/output_0.out3_5" ], "const_args": [ "model.layers.5.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.5.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.5.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.5.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.5/mlp/down_proj/MatMulNBits/output_0.out5_4_17" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8192" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_11", "type": "FlatRMSAdd", "in_args": [ "/model/layers.5/post_attention_layernorm/output_3.out4_10", "/model/layers.5/mlp/down_proj/MatMulNBits/output_0.out5_4_17" ], "const_args": [ "model.layers.6.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.6/input_layernorm/output_3.out4_11", "/model/layers.6/input_layernorm/output_0.out4_11" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_6", "type": "MladfMatMul", "in_args": [ "/model/layers.6/input_layernorm/output_0.out4_11" ], "const_args": [ "model.layers.6.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.6.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.6.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.6.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.6/attn/qk_proj/MatMulNBits/output_0.out5_4_18" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "6144" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.6.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.6/input_layernorm/output_0.out4_11" ], "const_args": [ "model.layers.6.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.6.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.6.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.6.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.6.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "27", "13" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "192" ] } } }, { "name": "/model/layers.6/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.6/attn/qk_proj/MatMulNBits/output_0.out5_4_18", "past_key_values.6.key", "past_key_values.6.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.6/attn/GroupQueryAttention/output_0.out2_6", "present.6.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "32" ] }, "kv_num_heads": { "type": "int", "value": [ "32" ] }, "scale": { "type": "float", "value": [ "0.10206207633018494" ] }, "local_window_size": { "type": "int", "value": [ "262144" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "32", "32", "1", "4096", "96" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "24", "12", "2", "0", "25", "13", "6", "0", "26", "12" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "192", "6", "0", "0", "192" ] } } }, { "name": "layers.6.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.6/attn/GroupQueryAttention/output_0.out2_6" ], "const_args": [ "model.layers.6.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.6.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.6.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.6.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.6/attn/o_proj/MatMulNBits/output_0.out5_4_19" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_12", "type": "FlatRMSAdd", "in_args": [ "/model/layers.6/input_layernorm/output_3.out4_11", "/model/layers.6/attn/o_proj/MatMulNBits/output_0.out5_4_19" ], "const_args": [ "model.layers.6.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.6/post_attention_layernorm/output_3.out4_12", "/model/layers.6/post_attention_layernorm/output_0.out4_12" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_6", "type": "FlatMLP", "in_args": [ "/model/layers.6/post_attention_layernorm/output_0.out4_12" ], "const_args": [ "model.layers.6.mlp.gate_proj.MatMulNBits.qweight", "model.layers.6.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.6.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.6.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.6.mlp.up_proj.MatMulNBits.qweight", "model.layers.6.mlp.up_proj.MatMulNBits.scales.f", "model.layers.6.mlp.up_proj.MatMulNBits.qzeros", "model.layers.6.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.6/mlp/Mul/output_0.out3_6" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "3072", "8192" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.6.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.6/mlp/Mul/output_0.out3_6" ], "const_args": [ "model.layers.6.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.6.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.6.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.6.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.6/mlp/down_proj/MatMulNBits/output_0.out5_4_20" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8192" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_13", "type": "FlatRMSAdd", "in_args": [ "/model/layers.6/post_attention_layernorm/output_3.out4_12", "/model/layers.6/mlp/down_proj/MatMulNBits/output_0.out5_4_20" ], "const_args": [ "model.layers.7.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.7/input_layernorm/output_3.out4_13", "/model/layers.7/input_layernorm/output_0.out4_13" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_7", "type": "MladfMatMul", "in_args": [ "/model/layers.7/input_layernorm/output_0.out4_13" ], "const_args": [ "model.layers.7.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.7.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.7.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.7.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.7/attn/qk_proj/MatMulNBits/output_0.out5_4_21" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "6144" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.7.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.7/input_layernorm/output_0.out4_13" ], "const_args": [ "model.layers.7.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.7.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.7.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.7.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.7.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "31", "15" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "192" ] } } }, { "name": "/model/layers.7/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.7/attn/qk_proj/MatMulNBits/output_0.out5_4_21", "past_key_values.7.key", "past_key_values.7.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.7/attn/GroupQueryAttention/output_0.out2_7", "present.7.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "32" ] }, "kv_num_heads": { "type": "int", "value": [ "32" ] }, "scale": { "type": "float", "value": [ "0.10206207633018494" ] }, "local_window_size": { "type": "int", "value": [ "262144" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "32", "32", "1", "4096", "96" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "28", "14", "2", "0", "29", "15", "6", "0", "30", "14" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "192", "6", "0", "0", "192" ] } } }, { "name": "layers.7.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.7/attn/GroupQueryAttention/output_0.out2_7" ], "const_args": [ "model.layers.7.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.7.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.7.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.7.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.7/attn/o_proj/MatMulNBits/output_0.out5_4_22" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_14", "type": "FlatRMSAdd", "in_args": [ "/model/layers.7/input_layernorm/output_3.out4_13", "/model/layers.7/attn/o_proj/MatMulNBits/output_0.out5_4_22" ], "const_args": [ "model.layers.7.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.7/post_attention_layernorm/output_3.out4_14", "/model/layers.7/post_attention_layernorm/output_0.out4_14" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_7", "type": "FlatMLP", "in_args": [ "/model/layers.7/post_attention_layernorm/output_0.out4_14" ], "const_args": [ "model.layers.7.mlp.gate_proj.MatMulNBits.qweight", "model.layers.7.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.7.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.7.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.7.mlp.up_proj.MatMulNBits.qweight", "model.layers.7.mlp.up_proj.MatMulNBits.scales.f", "model.layers.7.mlp.up_proj.MatMulNBits.qzeros", "model.layers.7.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.7/mlp/Mul/output_0.out3_7" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "3072", "8192" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.7.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.7/mlp/Mul/output_0.out3_7" ], "const_args": [ "model.layers.7.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.7.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.7.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.7.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.7/mlp/down_proj/MatMulNBits/output_0.out5_4_23" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8192" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_15", "type": "FlatRMSAdd", "in_args": [ "/model/layers.7/post_attention_layernorm/output_3.out4_14", "/model/layers.7/mlp/down_proj/MatMulNBits/output_0.out5_4_23" ], "const_args": [ "model.layers.8.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.8/input_layernorm/output_3.out4_15", "/model/layers.8/input_layernorm/output_0.out4_15" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_8", "type": "MladfMatMul", "in_args": [ "/model/layers.8/input_layernorm/output_0.out4_15" ], "const_args": [ "model.layers.8.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.8.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.8.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.8.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.8/attn/qk_proj/MatMulNBits/output_0.out5_4_24" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "6144" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.8.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.8/input_layernorm/output_0.out4_15" ], "const_args": [ "model.layers.8.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.8.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.8.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.8.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.8.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "35", "17" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "192" ] } } }, { "name": "/model/layers.8/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.8/attn/qk_proj/MatMulNBits/output_0.out5_4_24", "past_key_values.8.key", "past_key_values.8.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.8/attn/GroupQueryAttention/output_0.out2_8", "present.8.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "32" ] }, "kv_num_heads": { "type": "int", "value": [ "32" ] }, "scale": { "type": "float", "value": [ "0.10206207633018494" ] }, "local_window_size": { "type": "int", "value": [ "262144" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "32", "32", "1", "4096", "96" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "32", "16", "2", "0", "33", "17", "6", "0", "34", "16" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "192", "6", "0", "0", "192" ] } } }, { "name": "layers.8.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.8/attn/GroupQueryAttention/output_0.out2_8" ], "const_args": [ "model.layers.8.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.8.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.8.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.8.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.8/attn/o_proj/MatMulNBits/output_0.out5_4_25" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_16", "type": "FlatRMSAdd", "in_args": [ "/model/layers.8/input_layernorm/output_3.out4_15", "/model/layers.8/attn/o_proj/MatMulNBits/output_0.out5_4_25" ], "const_args": [ "model.layers.8.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.8/post_attention_layernorm/output_3.out4_16", "/model/layers.8/post_attention_layernorm/output_0.out4_16" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_8", "type": "FlatMLP", "in_args": [ "/model/layers.8/post_attention_layernorm/output_0.out4_16" ], "const_args": [ "model.layers.8.mlp.gate_proj.MatMulNBits.qweight", "model.layers.8.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.8.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.8.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.8.mlp.up_proj.MatMulNBits.qweight", "model.layers.8.mlp.up_proj.MatMulNBits.scales.f", "model.layers.8.mlp.up_proj.MatMulNBits.qzeros", "model.layers.8.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.8/mlp/Mul/output_0.out3_8" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "3072", "8192" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.8.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.8/mlp/Mul/output_0.out3_8" ], "const_args": [ "model.layers.8.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.8.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.8.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.8.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.8/mlp/down_proj/MatMulNBits/output_0.out5_4_26" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8192" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_17", "type": "FlatRMSAdd", "in_args": [ "/model/layers.8/post_attention_layernorm/output_3.out4_16", "/model/layers.8/mlp/down_proj/MatMulNBits/output_0.out5_4_26" ], "const_args": [ "model.layers.9.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.9/input_layernorm/output_3.out4_17", "/model/layers.9/input_layernorm/output_0.out4_17" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_9", "type": "MladfMatMul", "in_args": [ "/model/layers.9/input_layernorm/output_0.out4_17" ], "const_args": [ "model.layers.9.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.9.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.9.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.9.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.9/attn/qk_proj/MatMulNBits/output_0.out5_4_27" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "6144" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.9.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.9/input_layernorm/output_0.out4_17" ], "const_args": [ "model.layers.9.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.9.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.9.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.9.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.9.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "39", "19" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "192" ] } } }, { "name": "/model/layers.9/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.9/attn/qk_proj/MatMulNBits/output_0.out5_4_27", "past_key_values.9.key", "past_key_values.9.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.9/attn/GroupQueryAttention/output_0.out2_9", "present.9.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "32" ] }, "kv_num_heads": { "type": "int", "value": [ "32" ] }, "scale": { "type": "float", "value": [ "0.10206207633018494" ] }, "local_window_size": { "type": "int", "value": [ "262144" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "32", "32", "1", "4096", "96" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "36", "18", "2", "0", "37", "19", "6", "0", "38", "18" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "192", "6", "0", "0", "192" ] } } }, { "name": "layers.9.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.9/attn/GroupQueryAttention/output_0.out2_9" ], "const_args": [ "model.layers.9.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.9.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.9.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.9.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.9/attn/o_proj/MatMulNBits/output_0.out5_4_28" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_18", "type": "FlatRMSAdd", "in_args": [ "/model/layers.9/input_layernorm/output_3.out4_17", "/model/layers.9/attn/o_proj/MatMulNBits/output_0.out5_4_28" ], "const_args": [ "model.layers.9.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.9/post_attention_layernorm/output_3.out4_18", "/model/layers.9/post_attention_layernorm/output_0.out4_18" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_9", "type": "FlatMLP", "in_args": [ "/model/layers.9/post_attention_layernorm/output_0.out4_18" ], "const_args": [ "model.layers.9.mlp.gate_proj.MatMulNBits.qweight", "model.layers.9.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.9.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.9.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.9.mlp.up_proj.MatMulNBits.qweight", "model.layers.9.mlp.up_proj.MatMulNBits.scales.f", "model.layers.9.mlp.up_proj.MatMulNBits.qzeros", "model.layers.9.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.9/mlp/Mul/output_0.out3_9" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "3072", "8192" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.9.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.9/mlp/Mul/output_0.out3_9" ], "const_args": [ "model.layers.9.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.9.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.9.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.9.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.9/mlp/down_proj/MatMulNBits/output_0.out5_4_29" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8192" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_19", "type": "FlatRMSAdd", "in_args": [ "/model/layers.9/post_attention_layernorm/output_3.out4_18", "/model/layers.9/mlp/down_proj/MatMulNBits/output_0.out5_4_29" ], "const_args": [ "model.layers.10.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.10/input_layernorm/output_3.out4_19", "/model/layers.10/input_layernorm/output_0.out4_19" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_10", "type": "MladfMatMul", "in_args": [ "/model/layers.10/input_layernorm/output_0.out4_19" ], "const_args": [ "model.layers.10.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.10.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.10.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.10.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.10/attn/qk_proj/MatMulNBits/output_0.out5_4_30" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "6144" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.10.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.10/input_layernorm/output_0.out4_19" ], "const_args": [ "model.layers.10.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.10.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.10.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.10.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.10.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "43", "21" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "192" ] } } }, { "name": "/model/layers.10/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.10/attn/qk_proj/MatMulNBits/output_0.out5_4_30", "past_key_values.10.key", "past_key_values.10.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.10/attn/GroupQueryAttention/output_0.out2_10", "present.10.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "32" ] }, "kv_num_heads": { "type": "int", "value": [ "32" ] }, "scale": { "type": "float", "value": [ "0.10206207633018494" ] }, "local_window_size": { "type": "int", "value": [ "262144" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "32", "32", "1", "4096", "96" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "40", "20", "2", "0", "41", "21", "6", "0", "42", "20" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "192", "6", "0", "0", "192" ] } } }, { "name": "layers.10.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.10/attn/GroupQueryAttention/output_0.out2_10" ], "const_args": [ "model.layers.10.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.10.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.10.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.10.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.10/attn/o_proj/MatMulNBits/output_0.out5_4_31" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_20", "type": "FlatRMSAdd", "in_args": [ "/model/layers.10/input_layernorm/output_3.out4_19", "/model/layers.10/attn/o_proj/MatMulNBits/output_0.out5_4_31" ], "const_args": [ "model.layers.10.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.10/post_attention_layernorm/output_3.out4_20", "/model/layers.10/post_attention_layernorm/output_0.out4_20" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_10", "type": "FlatMLP", "in_args": [ "/model/layers.10/post_attention_layernorm/output_0.out4_20" ], "const_args": [ "model.layers.10.mlp.gate_proj.MatMulNBits.qweight", "model.layers.10.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.10.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.10.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.10.mlp.up_proj.MatMulNBits.qweight", "model.layers.10.mlp.up_proj.MatMulNBits.scales.f", "model.layers.10.mlp.up_proj.MatMulNBits.qzeros", "model.layers.10.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.10/mlp/Mul/output_0.out3_10" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "3072", "8192" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.10.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.10/mlp/Mul/output_0.out3_10" ], "const_args": [ "model.layers.10.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.10.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.10.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.10.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.10/mlp/down_proj/MatMulNBits/output_0.out5_4_32" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8192" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_21", "type": "FlatRMSAdd", "in_args": [ "/model/layers.10/post_attention_layernorm/output_3.out4_20", "/model/layers.10/mlp/down_proj/MatMulNBits/output_0.out5_4_32" ], "const_args": [ "model.layers.11.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.11/input_layernorm/output_3.out4_21", "/model/layers.11/input_layernorm/output_0.out4_21" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_11", "type": "MladfMatMul", "in_args": [ "/model/layers.11/input_layernorm/output_0.out4_21" ], "const_args": [ "model.layers.11.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.11.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.11.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.11.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.11/attn/qk_proj/MatMulNBits/output_0.out5_4_33" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "6144" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.11.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.11/input_layernorm/output_0.out4_21" ], "const_args": [ "model.layers.11.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.11.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.11.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.11.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.11.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "47", "23" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "192" ] } } }, { "name": "/model/layers.11/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.11/attn/qk_proj/MatMulNBits/output_0.out5_4_33", "past_key_values.11.key", "past_key_values.11.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.11/attn/GroupQueryAttention/output_0.out2_11", "present.11.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "32" ] }, "kv_num_heads": { "type": "int", "value": [ "32" ] }, "scale": { "type": "float", "value": [ "0.10206207633018494" ] }, "local_window_size": { "type": "int", "value": [ "262144" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "32", "32", "1", "4096", "96" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "44", "22", "2", "0", "45", "23", "6", "0", "46", "22" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "192", "6", "0", "0", "192" ] } } }, { "name": "layers.11.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.11/attn/GroupQueryAttention/output_0.out2_11" ], "const_args": [ "model.layers.11.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.11.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.11.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.11.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.11/attn/o_proj/MatMulNBits/output_0.out5_4_34" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_22", "type": "FlatRMSAdd", "in_args": [ "/model/layers.11/input_layernorm/output_3.out4_21", "/model/layers.11/attn/o_proj/MatMulNBits/output_0.out5_4_34" ], "const_args": [ "model.layers.11.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.11/post_attention_layernorm/output_3.out4_22", "/model/layers.11/post_attention_layernorm/output_0.out4_22" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_11", "type": "FlatMLP", "in_args": [ "/model/layers.11/post_attention_layernorm/output_0.out4_22" ], "const_args": [ "model.layers.11.mlp.gate_proj.MatMulNBits.qweight", "model.layers.11.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.11.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.11.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.11.mlp.up_proj.MatMulNBits.qweight", "model.layers.11.mlp.up_proj.MatMulNBits.scales.f", "model.layers.11.mlp.up_proj.MatMulNBits.qzeros", "model.layers.11.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.11/mlp/Mul/output_0.out3_11" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "3072", "8192" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.11.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.11/mlp/Mul/output_0.out3_11" ], "const_args": [ "model.layers.11.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.11.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.11.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.11.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.11/mlp/down_proj/MatMulNBits/output_0.out5_4_35" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8192" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_23", "type": "FlatRMSAdd", "in_args": [ "/model/layers.11/post_attention_layernorm/output_3.out4_22", "/model/layers.11/mlp/down_proj/MatMulNBits/output_0.out5_4_35" ], "const_args": [ "model.layers.12.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.12/input_layernorm/output_3.out4_23", "/model/layers.12/input_layernorm/output_0.out4_23" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_12", "type": "MladfMatMul", "in_args": [ "/model/layers.12/input_layernorm/output_0.out4_23" ], "const_args": [ "model.layers.12.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.12.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.12.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.12.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.12/attn/qk_proj/MatMulNBits/output_0.out5_4_36" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "6144" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.12.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.12/input_layernorm/output_0.out4_23" ], "const_args": [ "model.layers.12.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.12.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.12.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.12.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.12.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "51", "25" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "192" ] } } }, { "name": "/model/layers.12/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.12/attn/qk_proj/MatMulNBits/output_0.out5_4_36", "past_key_values.12.key", "past_key_values.12.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.12/attn/GroupQueryAttention/output_0.out2_12", "present.12.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "32" ] }, "kv_num_heads": { "type": "int", "value": [ "32" ] }, "scale": { "type": "float", "value": [ "0.10206207633018494" ] }, "local_window_size": { "type": "int", "value": [ "262144" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "32", "32", "1", "4096", "96" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "48", "24", "2", "0", "49", "25", "6", "0", "50", "24" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "192", "6", "0", "0", "192" ] } } }, { "name": "layers.12.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.12/attn/GroupQueryAttention/output_0.out2_12" ], "const_args": [ "model.layers.12.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.12.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.12.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.12.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.12/attn/o_proj/MatMulNBits/output_0.out5_4_37" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_24", "type": "FlatRMSAdd", "in_args": [ "/model/layers.12/input_layernorm/output_3.out4_23", "/model/layers.12/attn/o_proj/MatMulNBits/output_0.out5_4_37" ], "const_args": [ "model.layers.12.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.12/post_attention_layernorm/output_3.out4_24", "/model/layers.12/post_attention_layernorm/output_0.out4_24" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_12", "type": "FlatMLP", "in_args": [ "/model/layers.12/post_attention_layernorm/output_0.out4_24" ], "const_args": [ "model.layers.12.mlp.gate_proj.MatMulNBits.qweight", "model.layers.12.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.12.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.12.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.12.mlp.up_proj.MatMulNBits.qweight", "model.layers.12.mlp.up_proj.MatMulNBits.scales.f", "model.layers.12.mlp.up_proj.MatMulNBits.qzeros", "model.layers.12.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.12/mlp/Mul/output_0.out3_12" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "3072", "8192" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.12.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.12/mlp/Mul/output_0.out3_12" ], "const_args": [ "model.layers.12.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.12.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.12.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.12.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.12/mlp/down_proj/MatMulNBits/output_0.out5_4_38" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8192" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_25", "type": "FlatRMSAdd", "in_args": [ "/model/layers.12/post_attention_layernorm/output_3.out4_24", "/model/layers.12/mlp/down_proj/MatMulNBits/output_0.out5_4_38" ], "const_args": [ "model.layers.13.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.13/input_layernorm/output_3.out4_25", "/model/layers.13/input_layernorm/output_0.out4_25" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_13", "type": "MladfMatMul", "in_args": [ "/model/layers.13/input_layernorm/output_0.out4_25" ], "const_args": [ "model.layers.13.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.13.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.13.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.13.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.13/attn/qk_proj/MatMulNBits/output_0.out5_4_39" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "6144" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.13.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.13/input_layernorm/output_0.out4_25" ], "const_args": [ "model.layers.13.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.13.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.13.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.13.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.13.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "55", "27" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "192" ] } } }, { "name": "/model/layers.13/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.13/attn/qk_proj/MatMulNBits/output_0.out5_4_39", "past_key_values.13.key", "past_key_values.13.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.13/attn/GroupQueryAttention/output_0.out2_13", "present.13.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "32" ] }, "kv_num_heads": { "type": "int", "value": [ "32" ] }, "scale": { "type": "float", "value": [ "0.10206207633018494" ] }, "local_window_size": { "type": "int", "value": [ "262144" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "32", "32", "1", "4096", "96" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "52", "26", "2", "0", "53", "27", "6", "0", "54", "26" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "192", "6", "0", "0", "192" ] } } }, { "name": "layers.13.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.13/attn/GroupQueryAttention/output_0.out2_13" ], "const_args": [ "model.layers.13.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.13.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.13.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.13.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.13/attn/o_proj/MatMulNBits/output_0.out5_4_40" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_26", "type": "FlatRMSAdd", "in_args": [ "/model/layers.13/input_layernorm/output_3.out4_25", "/model/layers.13/attn/o_proj/MatMulNBits/output_0.out5_4_40" ], "const_args": [ "model.layers.13.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.13/post_attention_layernorm/output_3.out4_26", "/model/layers.13/post_attention_layernorm/output_0.out4_26" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_13", "type": "FlatMLP", "in_args": [ "/model/layers.13/post_attention_layernorm/output_0.out4_26" ], "const_args": [ "model.layers.13.mlp.gate_proj.MatMulNBits.qweight", "model.layers.13.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.13.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.13.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.13.mlp.up_proj.MatMulNBits.qweight", "model.layers.13.mlp.up_proj.MatMulNBits.scales.f", "model.layers.13.mlp.up_proj.MatMulNBits.qzeros", "model.layers.13.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.13/mlp/Mul/output_0.out3_13" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "3072", "8192" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.13.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.13/mlp/Mul/output_0.out3_13" ], "const_args": [ "model.layers.13.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.13.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.13.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.13.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.13/mlp/down_proj/MatMulNBits/output_0.out5_4_41" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8192" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_27", "type": "FlatRMSAdd", "in_args": [ "/model/layers.13/post_attention_layernorm/output_3.out4_26", "/model/layers.13/mlp/down_proj/MatMulNBits/output_0.out5_4_41" ], "const_args": [ "model.layers.14.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.14/input_layernorm/output_3.out4_27", "/model/layers.14/input_layernorm/output_0.out4_27" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_14", "type": "MladfMatMul", "in_args": [ "/model/layers.14/input_layernorm/output_0.out4_27" ], "const_args": [ "model.layers.14.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.14.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.14.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.14.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.14/attn/qk_proj/MatMulNBits/output_0.out5_4_42" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "6144" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.14.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.14/input_layernorm/output_0.out4_27" ], "const_args": [ "model.layers.14.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.14.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.14.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.14.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.14.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "59", "29" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "192" ] } } }, { "name": "/model/layers.14/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.14/attn/qk_proj/MatMulNBits/output_0.out5_4_42", "past_key_values.14.key", "past_key_values.14.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.14/attn/GroupQueryAttention/output_0.out2_14", "present.14.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "32" ] }, "kv_num_heads": { "type": "int", "value": [ "32" ] }, "scale": { "type": "float", "value": [ "0.10206207633018494" ] }, "local_window_size": { "type": "int", "value": [ "262144" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "32", "32", "1", "4096", "96" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "56", "28", "2", "0", "57", "29", "6", "0", "58", "28" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "192", "6", "0", "0", "192" ] } } }, { "name": "layers.14.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.14/attn/GroupQueryAttention/output_0.out2_14" ], "const_args": [ "model.layers.14.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.14.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.14.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.14.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.14/attn/o_proj/MatMulNBits/output_0.out5_4_43" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_28", "type": "FlatRMSAdd", "in_args": [ "/model/layers.14/input_layernorm/output_3.out4_27", "/model/layers.14/attn/o_proj/MatMulNBits/output_0.out5_4_43" ], "const_args": [ "model.layers.14.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.14/post_attention_layernorm/output_3.out4_28", "/model/layers.14/post_attention_layernorm/output_0.out4_28" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_14", "type": "FlatMLP", "in_args": [ "/model/layers.14/post_attention_layernorm/output_0.out4_28" ], "const_args": [ "model.layers.14.mlp.gate_proj.MatMulNBits.qweight", "model.layers.14.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.14.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.14.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.14.mlp.up_proj.MatMulNBits.qweight", "model.layers.14.mlp.up_proj.MatMulNBits.scales.f", "model.layers.14.mlp.up_proj.MatMulNBits.qzeros", "model.layers.14.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.14/mlp/Mul/output_0.out3_14" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "3072", "8192" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.14.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.14/mlp/Mul/output_0.out3_14" ], "const_args": [ "model.layers.14.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.14.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.14.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.14.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.14/mlp/down_proj/MatMulNBits/output_0.out5_4_44" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8192" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_29", "type": "FlatRMSAdd", "in_args": [ "/model/layers.14/post_attention_layernorm/output_3.out4_28", "/model/layers.14/mlp/down_proj/MatMulNBits/output_0.out5_4_44" ], "const_args": [ "model.layers.15.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.15/input_layernorm/output_3.out4_29", "/model/layers.15/input_layernorm/output_0.out4_29" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_15", "type": "MladfMatMul", "in_args": [ "/model/layers.15/input_layernorm/output_0.out4_29" ], "const_args": [ "model.layers.15.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.15.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.15.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.15.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.15/attn/qk_proj/MatMulNBits/output_0.out5_4_45" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "6144" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.15.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.15/input_layernorm/output_0.out4_29" ], "const_args": [ "model.layers.15.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.15.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.15.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.15.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.15.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "63", "31" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "192" ] } } }, { "name": "/model/layers.15/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.15/attn/qk_proj/MatMulNBits/output_0.out5_4_45", "past_key_values.15.key", "past_key_values.15.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.15/attn/GroupQueryAttention/output_0.out2_15", "present.15.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "32" ] }, "kv_num_heads": { "type": "int", "value": [ "32" ] }, "scale": { "type": "float", "value": [ "0.10206207633018494" ] }, "local_window_size": { "type": "int", "value": [ "262144" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "32", "32", "1", "4096", "96" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "60", "30", "2", "0", "61", "31", "6", "0", "62", "30" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "192", "6", "0", "0", "192" ] } } }, { "name": "layers.15.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.15/attn/GroupQueryAttention/output_0.out2_15" ], "const_args": [ "model.layers.15.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.15.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.15.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.15.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.15/attn/o_proj/MatMulNBits/output_0.out5_4_46" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_30", "type": "FlatRMSAdd", "in_args": [ "/model/layers.15/input_layernorm/output_3.out4_29", "/model/layers.15/attn/o_proj/MatMulNBits/output_0.out5_4_46" ], "const_args": [ "model.layers.15.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.15/post_attention_layernorm/output_3.out4_30", "/model/layers.15/post_attention_layernorm/output_0.out4_30" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_15", "type": "FlatMLP", "in_args": [ "/model/layers.15/post_attention_layernorm/output_0.out4_30" ], "const_args": [ "model.layers.15.mlp.gate_proj.MatMulNBits.qweight", "model.layers.15.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.15.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.15.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.15.mlp.up_proj.MatMulNBits.qweight", "model.layers.15.mlp.up_proj.MatMulNBits.scales.f", "model.layers.15.mlp.up_proj.MatMulNBits.qzeros", "model.layers.15.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.15/mlp/Mul/output_0.out3_15" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "3072", "8192" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.15.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.15/mlp/Mul/output_0.out3_15" ], "const_args": [ "model.layers.15.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.15.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.15.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.15.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.15/mlp/down_proj/MatMulNBits/output_0.out5_4_47" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8192" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_31", "type": "FlatRMSAdd", "in_args": [ "/model/layers.15/post_attention_layernorm/output_3.out4_30", "/model/layers.15/mlp/down_proj/MatMulNBits/output_0.out5_4_47" ], "const_args": [ "model.layers.16.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.16/input_layernorm/output_3.out4_31", "/model/layers.16/input_layernorm/output_0.out4_31" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_16", "type": "MladfMatMul", "in_args": [ "/model/layers.16/input_layernorm/output_0.out4_31" ], "const_args": [ "model.layers.16.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.16.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.16.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.16.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.16/attn/qk_proj/MatMulNBits/output_0.out5_4_48" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "6144" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.16.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.16/input_layernorm/output_0.out4_31" ], "const_args": [ "model.layers.16.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.16.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.16.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.16.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.16.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "67", "33" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "192" ] } } }, { "name": "/model/layers.16/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.16/attn/qk_proj/MatMulNBits/output_0.out5_4_48", "past_key_values.16.key", "past_key_values.16.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.16/attn/GroupQueryAttention/output_0.out2_16", "present.16.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "32" ] }, "kv_num_heads": { "type": "int", "value": [ "32" ] }, "scale": { "type": "float", "value": [ "0.10206207633018494" ] }, "local_window_size": { "type": "int", "value": [ "262144" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "32", "32", "1", "4096", "96" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "64", "32", "2", "0", "65", "33", "6", "0", "66", "32" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "192", "6", "0", "0", "192" ] } } }, { "name": "layers.16.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.16/attn/GroupQueryAttention/output_0.out2_16" ], "const_args": [ "model.layers.16.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.16.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.16.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.16.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.16/attn/o_proj/MatMulNBits/output_0.out5_4_49" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_32", "type": "FlatRMSAdd", "in_args": [ "/model/layers.16/input_layernorm/output_3.out4_31", "/model/layers.16/attn/o_proj/MatMulNBits/output_0.out5_4_49" ], "const_args": [ "model.layers.16.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.16/post_attention_layernorm/output_3.out4_32", "/model/layers.16/post_attention_layernorm/output_0.out4_32" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_16", "type": "FlatMLP", "in_args": [ "/model/layers.16/post_attention_layernorm/output_0.out4_32" ], "const_args": [ "model.layers.16.mlp.gate_proj.MatMulNBits.qweight", "model.layers.16.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.16.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.16.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.16.mlp.up_proj.MatMulNBits.qweight", "model.layers.16.mlp.up_proj.MatMulNBits.scales.f", "model.layers.16.mlp.up_proj.MatMulNBits.qzeros", "model.layers.16.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.16/mlp/Mul/output_0.out3_16" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "3072", "8192" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.16.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.16/mlp/Mul/output_0.out3_16" ], "const_args": [ "model.layers.16.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.16.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.16.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.16.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.16/mlp/down_proj/MatMulNBits/output_0.out5_4_50" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8192" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_33", "type": "FlatRMSAdd", "in_args": [ "/model/layers.16/post_attention_layernorm/output_3.out4_32", "/model/layers.16/mlp/down_proj/MatMulNBits/output_0.out5_4_50" ], "const_args": [ "model.layers.17.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.17/input_layernorm/output_3.out4_33", "/model/layers.17/input_layernorm/output_0.out4_33" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_17", "type": "MladfMatMul", "in_args": [ "/model/layers.17/input_layernorm/output_0.out4_33" ], "const_args": [ "model.layers.17.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.17.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.17.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.17.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.17/attn/qk_proj/MatMulNBits/output_0.out5_4_51" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "6144" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.17.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.17/input_layernorm/output_0.out4_33" ], "const_args": [ "model.layers.17.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.17.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.17.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.17.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.17.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "71", "35" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "192" ] } } }, { "name": "/model/layers.17/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.17/attn/qk_proj/MatMulNBits/output_0.out5_4_51", "past_key_values.17.key", "past_key_values.17.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.17/attn/GroupQueryAttention/output_0.out2_17", "present.17.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "32" ] }, "kv_num_heads": { "type": "int", "value": [ "32" ] }, "scale": { "type": "float", "value": [ "0.10206207633018494" ] }, "local_window_size": { "type": "int", "value": [ "262144" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "32", "32", "1", "4096", "96" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "68", "34", "2", "0", "69", "35", "6", "0", "70", "34" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "192", "6", "0", "0", "192" ] } } }, { "name": "layers.17.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.17/attn/GroupQueryAttention/output_0.out2_17" ], "const_args": [ "model.layers.17.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.17.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.17.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.17.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.17/attn/o_proj/MatMulNBits/output_0.out5_4_52" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_34", "type": "FlatRMSAdd", "in_args": [ "/model/layers.17/input_layernorm/output_3.out4_33", "/model/layers.17/attn/o_proj/MatMulNBits/output_0.out5_4_52" ], "const_args": [ "model.layers.17.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.17/post_attention_layernorm/output_3.out4_34", "/model/layers.17/post_attention_layernorm/output_0.out4_34" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_17", "type": "FlatMLP", "in_args": [ "/model/layers.17/post_attention_layernorm/output_0.out4_34" ], "const_args": [ "model.layers.17.mlp.gate_proj.MatMulNBits.qweight", "model.layers.17.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.17.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.17.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.17.mlp.up_proj.MatMulNBits.qweight", "model.layers.17.mlp.up_proj.MatMulNBits.scales.f", "model.layers.17.mlp.up_proj.MatMulNBits.qzeros", "model.layers.17.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.17/mlp/Mul/output_0.out3_17" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "3072", "8192" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.17.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.17/mlp/Mul/output_0.out3_17" ], "const_args": [ "model.layers.17.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.17.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.17.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.17.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.17/mlp/down_proj/MatMulNBits/output_0.out5_4_53" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8192" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_35", "type": "FlatRMSAdd", "in_args": [ "/model/layers.17/post_attention_layernorm/output_3.out4_34", "/model/layers.17/mlp/down_proj/MatMulNBits/output_0.out5_4_53" ], "const_args": [ "model.layers.18.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.18/input_layernorm/output_3.out4_35", "/model/layers.18/input_layernorm/output_0.out4_35" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_18", "type": "MladfMatMul", "in_args": [ "/model/layers.18/input_layernorm/output_0.out4_35" ], "const_args": [ "model.layers.18.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.18.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.18.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.18.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.18/attn/qk_proj/MatMulNBits/output_0.out5_4_54" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "6144" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.18.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.18/input_layernorm/output_0.out4_35" ], "const_args": [ "model.layers.18.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.18.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.18.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.18.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.18.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "75", "37" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "192" ] } } }, { "name": "/model/layers.18/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.18/attn/qk_proj/MatMulNBits/output_0.out5_4_54", "past_key_values.18.key", "past_key_values.18.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.18/attn/GroupQueryAttention/output_0.out2_18", "present.18.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "32" ] }, "kv_num_heads": { "type": "int", "value": [ "32" ] }, "scale": { "type": "float", "value": [ "0.10206207633018494" ] }, "local_window_size": { "type": "int", "value": [ "262144" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "32", "32", "1", "4096", "96" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "72", "36", "2", "0", "73", "37", "6", "0", "74", "36" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "192", "6", "0", "0", "192" ] } } }, { "name": "layers.18.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.18/attn/GroupQueryAttention/output_0.out2_18" ], "const_args": [ "model.layers.18.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.18.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.18.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.18.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.18/attn/o_proj/MatMulNBits/output_0.out5_4_55" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_36", "type": "FlatRMSAdd", "in_args": [ "/model/layers.18/input_layernorm/output_3.out4_35", "/model/layers.18/attn/o_proj/MatMulNBits/output_0.out5_4_55" ], "const_args": [ "model.layers.18.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.18/post_attention_layernorm/output_3.out4_36", "/model/layers.18/post_attention_layernorm/output_0.out4_36" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_18", "type": "FlatMLP", "in_args": [ "/model/layers.18/post_attention_layernorm/output_0.out4_36" ], "const_args": [ "model.layers.18.mlp.gate_proj.MatMulNBits.qweight", "model.layers.18.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.18.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.18.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.18.mlp.up_proj.MatMulNBits.qweight", "model.layers.18.mlp.up_proj.MatMulNBits.scales.f", "model.layers.18.mlp.up_proj.MatMulNBits.qzeros", "model.layers.18.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.18/mlp/Mul/output_0.out3_18" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "3072", "8192" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.18.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.18/mlp/Mul/output_0.out3_18" ], "const_args": [ "model.layers.18.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.18.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.18.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.18.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.18/mlp/down_proj/MatMulNBits/output_0.out5_4_56" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8192" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_37", "type": "FlatRMSAdd", "in_args": [ "/model/layers.18/post_attention_layernorm/output_3.out4_36", "/model/layers.18/mlp/down_proj/MatMulNBits/output_0.out5_4_56" ], "const_args": [ "model.layers.19.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.19/input_layernorm/output_3.out4_37", "/model/layers.19/input_layernorm/output_0.out4_37" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_19", "type": "MladfMatMul", "in_args": [ "/model/layers.19/input_layernorm/output_0.out4_37" ], "const_args": [ "model.layers.19.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.19.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.19.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.19.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.19/attn/qk_proj/MatMulNBits/output_0.out5_4_57" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "6144" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.19.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.19/input_layernorm/output_0.out4_37" ], "const_args": [ "model.layers.19.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.19.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.19.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.19.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.19.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "79", "39" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "192" ] } } }, { "name": "/model/layers.19/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.19/attn/qk_proj/MatMulNBits/output_0.out5_4_57", "past_key_values.19.key", "past_key_values.19.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.19/attn/GroupQueryAttention/output_0.out2_19", "present.19.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "32" ] }, "kv_num_heads": { "type": "int", "value": [ "32" ] }, "scale": { "type": "float", "value": [ "0.10206207633018494" ] }, "local_window_size": { "type": "int", "value": [ "262144" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "32", "32", "1", "4096", "96" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "76", "38", "2", "0", "77", "39", "6", "0", "78", "38" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "192", "6", "0", "0", "192" ] } } }, { "name": "layers.19.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.19/attn/GroupQueryAttention/output_0.out2_19" ], "const_args": [ "model.layers.19.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.19.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.19.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.19.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.19/attn/o_proj/MatMulNBits/output_0.out5_4_58" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_38", "type": "FlatRMSAdd", "in_args": [ "/model/layers.19/input_layernorm/output_3.out4_37", "/model/layers.19/attn/o_proj/MatMulNBits/output_0.out5_4_58" ], "const_args": [ "model.layers.19.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.19/post_attention_layernorm/output_3.out4_38", "/model/layers.19/post_attention_layernorm/output_0.out4_38" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_19", "type": "FlatMLP", "in_args": [ "/model/layers.19/post_attention_layernorm/output_0.out4_38" ], "const_args": [ "model.layers.19.mlp.gate_proj.MatMulNBits.qweight", "model.layers.19.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.19.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.19.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.19.mlp.up_proj.MatMulNBits.qweight", "model.layers.19.mlp.up_proj.MatMulNBits.scales.f", "model.layers.19.mlp.up_proj.MatMulNBits.qzeros", "model.layers.19.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.19/mlp/Mul/output_0.out3_19" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "3072", "8192" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.19.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.19/mlp/Mul/output_0.out3_19" ], "const_args": [ "model.layers.19.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.19.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.19.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.19.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.19/mlp/down_proj/MatMulNBits/output_0.out5_4_59" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8192" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_39", "type": "FlatRMSAdd", "in_args": [ "/model/layers.19/post_attention_layernorm/output_3.out4_38", "/model/layers.19/mlp/down_proj/MatMulNBits/output_0.out5_4_59" ], "const_args": [ "model.layers.20.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.20/input_layernorm/output_3.out4_39", "/model/layers.20/input_layernorm/output_0.out4_39" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_20", "type": "MladfMatMul", "in_args": [ "/model/layers.20/input_layernorm/output_0.out4_39" ], "const_args": [ "model.layers.20.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.20.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.20.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.20.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.20/attn/qk_proj/MatMulNBits/output_0.out5_4_60" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "6144" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.20.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.20/input_layernorm/output_0.out4_39" ], "const_args": [ "model.layers.20.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.20.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.20.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.20.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.20.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "83", "41" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "192" ] } } }, { "name": "/model/layers.20/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.20/attn/qk_proj/MatMulNBits/output_0.out5_4_60", "past_key_values.20.key", "past_key_values.20.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.20/attn/GroupQueryAttention/output_0.out2_20", "present.20.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "32" ] }, "kv_num_heads": { "type": "int", "value": [ "32" ] }, "scale": { "type": "float", "value": [ "0.10206207633018494" ] }, "local_window_size": { "type": "int", "value": [ "262144" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "32", "32", "1", "4096", "96" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "80", "40", "2", "0", "81", "41", "6", "0", "82", "40" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "192", "6", "0", "0", "192" ] } } }, { "name": "layers.20.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.20/attn/GroupQueryAttention/output_0.out2_20" ], "const_args": [ "model.layers.20.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.20.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.20.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.20.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.20/attn/o_proj/MatMulNBits/output_0.out5_4_61" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_40", "type": "FlatRMSAdd", "in_args": [ "/model/layers.20/input_layernorm/output_3.out4_39", "/model/layers.20/attn/o_proj/MatMulNBits/output_0.out5_4_61" ], "const_args": [ "model.layers.20.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.20/post_attention_layernorm/output_3.out4_40", "/model/layers.20/post_attention_layernorm/output_0.out4_40" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_20", "type": "FlatMLP", "in_args": [ "/model/layers.20/post_attention_layernorm/output_0.out4_40" ], "const_args": [ "model.layers.20.mlp.gate_proj.MatMulNBits.qweight", "model.layers.20.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.20.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.20.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.20.mlp.up_proj.MatMulNBits.qweight", "model.layers.20.mlp.up_proj.MatMulNBits.scales.f", "model.layers.20.mlp.up_proj.MatMulNBits.qzeros", "model.layers.20.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.20/mlp/Mul/output_0.out3_20" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "3072", "8192" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.20.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.20/mlp/Mul/output_0.out3_20" ], "const_args": [ "model.layers.20.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.20.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.20.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.20.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.20/mlp/down_proj/MatMulNBits/output_0.out5_4_62" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8192" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_41", "type": "FlatRMSAdd", "in_args": [ "/model/layers.20/post_attention_layernorm/output_3.out4_40", "/model/layers.20/mlp/down_proj/MatMulNBits/output_0.out5_4_62" ], "const_args": [ "model.layers.21.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.21/input_layernorm/output_3.out4_41", "/model/layers.21/input_layernorm/output_0.out4_41" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_21", "type": "MladfMatMul", "in_args": [ "/model/layers.21/input_layernorm/output_0.out4_41" ], "const_args": [ "model.layers.21.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.21.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.21.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.21.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.21/attn/qk_proj/MatMulNBits/output_0.out5_4_63" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "6144" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.21.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.21/input_layernorm/output_0.out4_41" ], "const_args": [ "model.layers.21.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.21.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.21.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.21.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.21.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "87", "43" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "192" ] } } }, { "name": "/model/layers.21/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.21/attn/qk_proj/MatMulNBits/output_0.out5_4_63", "past_key_values.21.key", "past_key_values.21.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.21/attn/GroupQueryAttention/output_0.out2_21", "present.21.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "32" ] }, "kv_num_heads": { "type": "int", "value": [ "32" ] }, "scale": { "type": "float", "value": [ "0.10206207633018494" ] }, "local_window_size": { "type": "int", "value": [ "262144" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "32", "32", "1", "4096", "96" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "84", "42", "2", "0", "85", "43", "6", "0", "86", "42" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "192", "6", "0", "0", "192" ] } } }, { "name": "layers.21.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.21/attn/GroupQueryAttention/output_0.out2_21" ], "const_args": [ "model.layers.21.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.21.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.21.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.21.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.21/attn/o_proj/MatMulNBits/output_0.out5_4_64" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_42", "type": "FlatRMSAdd", "in_args": [ "/model/layers.21/input_layernorm/output_3.out4_41", "/model/layers.21/attn/o_proj/MatMulNBits/output_0.out5_4_64" ], "const_args": [ "model.layers.21.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.21/post_attention_layernorm/output_3.out4_42", "/model/layers.21/post_attention_layernorm/output_0.out4_42" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_21", "type": "FlatMLP", "in_args": [ "/model/layers.21/post_attention_layernorm/output_0.out4_42" ], "const_args": [ "model.layers.21.mlp.gate_proj.MatMulNBits.qweight", "model.layers.21.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.21.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.21.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.21.mlp.up_proj.MatMulNBits.qweight", "model.layers.21.mlp.up_proj.MatMulNBits.scales.f", "model.layers.21.mlp.up_proj.MatMulNBits.qzeros", "model.layers.21.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.21/mlp/Mul/output_0.out3_21" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "3072", "8192" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.21.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.21/mlp/Mul/output_0.out3_21" ], "const_args": [ "model.layers.21.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.21.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.21.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.21.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.21/mlp/down_proj/MatMulNBits/output_0.out5_4_65" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8192" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_43", "type": "FlatRMSAdd", "in_args": [ "/model/layers.21/post_attention_layernorm/output_3.out4_42", "/model/layers.21/mlp/down_proj/MatMulNBits/output_0.out5_4_65" ], "const_args": [ "model.layers.22.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.22/input_layernorm/output_3.out4_43", "/model/layers.22/input_layernorm/output_0.out4_43" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_22", "type": "MladfMatMul", "in_args": [ "/model/layers.22/input_layernorm/output_0.out4_43" ], "const_args": [ "model.layers.22.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.22.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.22.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.22.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.22/attn/qk_proj/MatMulNBits/output_0.out5_4_66" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "6144" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.22.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.22/input_layernorm/output_0.out4_43" ], "const_args": [ "model.layers.22.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.22.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.22.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.22.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.22.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "91", "45" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "192" ] } } }, { "name": "/model/layers.22/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.22/attn/qk_proj/MatMulNBits/output_0.out5_4_66", "past_key_values.22.key", "past_key_values.22.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.22/attn/GroupQueryAttention/output_0.out2_22", "present.22.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "32" ] }, "kv_num_heads": { "type": "int", "value": [ "32" ] }, "scale": { "type": "float", "value": [ "0.10206207633018494" ] }, "local_window_size": { "type": "int", "value": [ "262144" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "32", "32", "1", "4096", "96" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "88", "44", "2", "0", "89", "45", "6", "0", "90", "44" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "192", "6", "0", "0", "192" ] } } }, { "name": "layers.22.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.22/attn/GroupQueryAttention/output_0.out2_22" ], "const_args": [ "model.layers.22.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.22.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.22.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.22.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.22/attn/o_proj/MatMulNBits/output_0.out5_4_67" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_44", "type": "FlatRMSAdd", "in_args": [ "/model/layers.22/input_layernorm/output_3.out4_43", "/model/layers.22/attn/o_proj/MatMulNBits/output_0.out5_4_67" ], "const_args": [ "model.layers.22.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.22/post_attention_layernorm/output_3.out4_44", "/model/layers.22/post_attention_layernorm/output_0.out4_44" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_22", "type": "FlatMLP", "in_args": [ "/model/layers.22/post_attention_layernorm/output_0.out4_44" ], "const_args": [ "model.layers.22.mlp.gate_proj.MatMulNBits.qweight", "model.layers.22.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.22.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.22.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.22.mlp.up_proj.MatMulNBits.qweight", "model.layers.22.mlp.up_proj.MatMulNBits.scales.f", "model.layers.22.mlp.up_proj.MatMulNBits.qzeros", "model.layers.22.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.22/mlp/Mul/output_0.out3_22" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "3072", "8192" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.22.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.22/mlp/Mul/output_0.out3_22" ], "const_args": [ "model.layers.22.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.22.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.22.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.22.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.22/mlp/down_proj/MatMulNBits/output_0.out5_4_68" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8192" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_45", "type": "FlatRMSAdd", "in_args": [ "/model/layers.22/post_attention_layernorm/output_3.out4_44", "/model/layers.22/mlp/down_proj/MatMulNBits/output_0.out5_4_68" ], "const_args": [ "model.layers.23.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.23/input_layernorm/output_3.out4_45", "/model/layers.23/input_layernorm/output_0.out4_45" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_23", "type": "MladfMatMul", "in_args": [ "/model/layers.23/input_layernorm/output_0.out4_45" ], "const_args": [ "model.layers.23.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.23.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.23.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.23.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.23/attn/qk_proj/MatMulNBits/output_0.out5_4_69" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "6144" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.23.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.23/input_layernorm/output_0.out4_45" ], "const_args": [ "model.layers.23.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.23.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.23.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.23.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.23.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "95", "47" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "192" ] } } }, { "name": "/model/layers.23/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.23/attn/qk_proj/MatMulNBits/output_0.out5_4_69", "past_key_values.23.key", "past_key_values.23.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.23/attn/GroupQueryAttention/output_0.out2_23", "present.23.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "32" ] }, "kv_num_heads": { "type": "int", "value": [ "32" ] }, "scale": { "type": "float", "value": [ "0.10206207633018494" ] }, "local_window_size": { "type": "int", "value": [ "262144" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "32", "32", "1", "4096", "96" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "92", "46", "2", "0", "93", "47", "6", "0", "94", "46" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "192", "6", "0", "0", "192" ] } } }, { "name": "layers.23.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.23/attn/GroupQueryAttention/output_0.out2_23" ], "const_args": [ "model.layers.23.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.23.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.23.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.23.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.23/attn/o_proj/MatMulNBits/output_0.out5_4_70" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_46", "type": "FlatRMSAdd", "in_args": [ "/model/layers.23/input_layernorm/output_3.out4_45", "/model/layers.23/attn/o_proj/MatMulNBits/output_0.out5_4_70" ], "const_args": [ "model.layers.23.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.23/post_attention_layernorm/output_3.out4_46", "/model/layers.23/post_attention_layernorm/output_0.out4_46" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_23", "type": "FlatMLP", "in_args": [ "/model/layers.23/post_attention_layernorm/output_0.out4_46" ], "const_args": [ "model.layers.23.mlp.gate_proj.MatMulNBits.qweight", "model.layers.23.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.23.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.23.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.23.mlp.up_proj.MatMulNBits.qweight", "model.layers.23.mlp.up_proj.MatMulNBits.scales.f", "model.layers.23.mlp.up_proj.MatMulNBits.qzeros", "model.layers.23.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.23/mlp/Mul/output_0.out3_23" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "3072", "8192" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.23.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.23/mlp/Mul/output_0.out3_23" ], "const_args": [ "model.layers.23.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.23.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.23.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.23.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.23/mlp/down_proj/MatMulNBits/output_0.out5_4_71" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8192" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_47", "type": "FlatRMSAdd", "in_args": [ "/model/layers.23/post_attention_layernorm/output_3.out4_46", "/model/layers.23/mlp/down_proj/MatMulNBits/output_0.out5_4_71" ], "const_args": [ "model.layers.24.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.24/input_layernorm/output_3.out4_47", "/model/layers.24/input_layernorm/output_0.out4_47" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_24", "type": "MladfMatMul", "in_args": [ "/model/layers.24/input_layernorm/output_0.out4_47" ], "const_args": [ "model.layers.24.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.24.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.24.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.24.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.24/attn/qk_proj/MatMulNBits/output_0.out5_4_72" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "6144" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.24.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.24/input_layernorm/output_0.out4_47" ], "const_args": [ "model.layers.24.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.24.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.24.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.24.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.24.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "99", "49" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "192" ] } } }, { "name": "/model/layers.24/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.24/attn/qk_proj/MatMulNBits/output_0.out5_4_72", "past_key_values.24.key", "past_key_values.24.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.24/attn/GroupQueryAttention/output_0.out2_24", "present.24.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "32" ] }, "kv_num_heads": { "type": "int", "value": [ "32" ] }, "scale": { "type": "float", "value": [ "0.10206207633018494" ] }, "local_window_size": { "type": "int", "value": [ "262144" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "32", "32", "1", "4096", "96" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "96", "48", "2", "0", "97", "49", "6", "0", "98", "48" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "192", "6", "0", "0", "192" ] } } }, { "name": "layers.24.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.24/attn/GroupQueryAttention/output_0.out2_24" ], "const_args": [ "model.layers.24.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.24.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.24.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.24.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.24/attn/o_proj/MatMulNBits/output_0.out5_4_73" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_48", "type": "FlatRMSAdd", "in_args": [ "/model/layers.24/input_layernorm/output_3.out4_47", "/model/layers.24/attn/o_proj/MatMulNBits/output_0.out5_4_73" ], "const_args": [ "model.layers.24.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.24/post_attention_layernorm/output_3.out4_48", "/model/layers.24/post_attention_layernorm/output_0.out4_48" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_24", "type": "FlatMLP", "in_args": [ "/model/layers.24/post_attention_layernorm/output_0.out4_48" ], "const_args": [ "model.layers.24.mlp.gate_proj.MatMulNBits.qweight", "model.layers.24.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.24.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.24.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.24.mlp.up_proj.MatMulNBits.qweight", "model.layers.24.mlp.up_proj.MatMulNBits.scales.f", "model.layers.24.mlp.up_proj.MatMulNBits.qzeros", "model.layers.24.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.24/mlp/Mul/output_0.out3_24" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "3072", "8192" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.24.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.24/mlp/Mul/output_0.out3_24" ], "const_args": [ "model.layers.24.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.24.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.24.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.24.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.24/mlp/down_proj/MatMulNBits/output_0.out5_4_74" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8192" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_49", "type": "FlatRMSAdd", "in_args": [ "/model/layers.24/post_attention_layernorm/output_3.out4_48", "/model/layers.24/mlp/down_proj/MatMulNBits/output_0.out5_4_74" ], "const_args": [ "model.layers.25.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.25/input_layernorm/output_3.out4_49", "/model/layers.25/input_layernorm/output_0.out4_49" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_25", "type": "MladfMatMul", "in_args": [ "/model/layers.25/input_layernorm/output_0.out4_49" ], "const_args": [ "model.layers.25.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.25.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.25.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.25.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.25/attn/qk_proj/MatMulNBits/output_0.out5_4_75" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "6144" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.25.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.25/input_layernorm/output_0.out4_49" ], "const_args": [ "model.layers.25.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.25.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.25.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.25.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.25.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "103", "51" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "192" ] } } }, { "name": "/model/layers.25/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.25/attn/qk_proj/MatMulNBits/output_0.out5_4_75", "past_key_values.25.key", "past_key_values.25.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.25/attn/GroupQueryAttention/output_0.out2_25", "present.25.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "32" ] }, "kv_num_heads": { "type": "int", "value": [ "32" ] }, "scale": { "type": "float", "value": [ "0.10206207633018494" ] }, "local_window_size": { "type": "int", "value": [ "262144" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "32", "32", "1", "4096", "96" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "100", "50", "2", "0", "101", "51", "6", "0", "102", "50" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "192", "6", "0", "0", "192" ] } } }, { "name": "layers.25.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.25/attn/GroupQueryAttention/output_0.out2_25" ], "const_args": [ "model.layers.25.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.25.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.25.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.25.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.25/attn/o_proj/MatMulNBits/output_0.out5_4_76" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_50", "type": "FlatRMSAdd", "in_args": [ "/model/layers.25/input_layernorm/output_3.out4_49", "/model/layers.25/attn/o_proj/MatMulNBits/output_0.out5_4_76" ], "const_args": [ "model.layers.25.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.25/post_attention_layernorm/output_3.out4_50", "/model/layers.25/post_attention_layernorm/output_0.out4_50" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_25", "type": "FlatMLP", "in_args": [ "/model/layers.25/post_attention_layernorm/output_0.out4_50" ], "const_args": [ "model.layers.25.mlp.gate_proj.MatMulNBits.qweight", "model.layers.25.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.25.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.25.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.25.mlp.up_proj.MatMulNBits.qweight", "model.layers.25.mlp.up_proj.MatMulNBits.scales.f", "model.layers.25.mlp.up_proj.MatMulNBits.qzeros", "model.layers.25.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.25/mlp/Mul/output_0.out3_25" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "3072", "8192" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.25.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.25/mlp/Mul/output_0.out3_25" ], "const_args": [ "model.layers.25.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.25.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.25.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.25.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.25/mlp/down_proj/MatMulNBits/output_0.out5_4_77" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8192" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_51", "type": "FlatRMSAdd", "in_args": [ "/model/layers.25/post_attention_layernorm/output_3.out4_50", "/model/layers.25/mlp/down_proj/MatMulNBits/output_0.out5_4_77" ], "const_args": [ "model.layers.26.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.26/input_layernorm/output_3.out4_51", "/model/layers.26/input_layernorm/output_0.out4_51" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_26", "type": "MladfMatMul", "in_args": [ "/model/layers.26/input_layernorm/output_0.out4_51" ], "const_args": [ "model.layers.26.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.26.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.26.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.26.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.26/attn/qk_proj/MatMulNBits/output_0.out5_4_78" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "6144" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.26.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.26/input_layernorm/output_0.out4_51" ], "const_args": [ "model.layers.26.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.26.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.26.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.26.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.26.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "107", "53" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "192" ] } } }, { "name": "/model/layers.26/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.26/attn/qk_proj/MatMulNBits/output_0.out5_4_78", "past_key_values.26.key", "past_key_values.26.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.26/attn/GroupQueryAttention/output_0.out2_26", "present.26.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "32" ] }, "kv_num_heads": { "type": "int", "value": [ "32" ] }, "scale": { "type": "float", "value": [ "0.10206207633018494" ] }, "local_window_size": { "type": "int", "value": [ "262144" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "32", "32", "1", "4096", "96" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "104", "52", "2", "0", "105", "53", "6", "0", "106", "52" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "192", "6", "0", "0", "192" ] } } }, { "name": "layers.26.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.26/attn/GroupQueryAttention/output_0.out2_26" ], "const_args": [ "model.layers.26.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.26.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.26.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.26.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.26/attn/o_proj/MatMulNBits/output_0.out5_4_79" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_52", "type": "FlatRMSAdd", "in_args": [ "/model/layers.26/input_layernorm/output_3.out4_51", "/model/layers.26/attn/o_proj/MatMulNBits/output_0.out5_4_79" ], "const_args": [ "model.layers.26.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.26/post_attention_layernorm/output_3.out4_52", "/model/layers.26/post_attention_layernorm/output_0.out4_52" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_26", "type": "FlatMLP", "in_args": [ "/model/layers.26/post_attention_layernorm/output_0.out4_52" ], "const_args": [ "model.layers.26.mlp.gate_proj.MatMulNBits.qweight", "model.layers.26.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.26.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.26.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.26.mlp.up_proj.MatMulNBits.qweight", "model.layers.26.mlp.up_proj.MatMulNBits.scales.f", "model.layers.26.mlp.up_proj.MatMulNBits.qzeros", "model.layers.26.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.26/mlp/Mul/output_0.out3_26" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "3072", "8192" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.26.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.26/mlp/Mul/output_0.out3_26" ], "const_args": [ "model.layers.26.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.26.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.26.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.26.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.26/mlp/down_proj/MatMulNBits/output_0.out5_4_80" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8192" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_53", "type": "FlatRMSAdd", "in_args": [ "/model/layers.26/post_attention_layernorm/output_3.out4_52", "/model/layers.26/mlp/down_proj/MatMulNBits/output_0.out5_4_80" ], "const_args": [ "model.layers.27.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.27/input_layernorm/output_3.out4_53", "/model/layers.27/input_layernorm/output_0.out4_53" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_27", "type": "MladfMatMul", "in_args": [ "/model/layers.27/input_layernorm/output_0.out4_53" ], "const_args": [ "model.layers.27.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.27.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.27.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.27.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.27/attn/qk_proj/MatMulNBits/output_0.out5_4_81" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "6144" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.27.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.27/input_layernorm/output_0.out4_53" ], "const_args": [ "model.layers.27.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.27.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.27.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.27.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.27.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "111", "55" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "192" ] } } }, { "name": "/model/layers.27/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.27/attn/qk_proj/MatMulNBits/output_0.out5_4_81", "past_key_values.27.key", "past_key_values.27.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.27/attn/GroupQueryAttention/output_0.out2_27", "present.27.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "32" ] }, "kv_num_heads": { "type": "int", "value": [ "32" ] }, "scale": { "type": "float", "value": [ "0.10206207633018494" ] }, "local_window_size": { "type": "int", "value": [ "262144" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "32", "32", "1", "4096", "96" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "108", "54", "2", "0", "109", "55", "6", "0", "110", "54" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "192", "6", "0", "0", "192" ] } } }, { "name": "layers.27.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.27/attn/GroupQueryAttention/output_0.out2_27" ], "const_args": [ "model.layers.27.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.27.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.27.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.27.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.27/attn/o_proj/MatMulNBits/output_0.out5_4_82" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_54", "type": "FlatRMSAdd", "in_args": [ "/model/layers.27/input_layernorm/output_3.out4_53", "/model/layers.27/attn/o_proj/MatMulNBits/output_0.out5_4_82" ], "const_args": [ "model.layers.27.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.27/post_attention_layernorm/output_3.out4_54", "/model/layers.27/post_attention_layernorm/output_0.out4_54" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_27", "type": "FlatMLP", "in_args": [ "/model/layers.27/post_attention_layernorm/output_0.out4_54" ], "const_args": [ "model.layers.27.mlp.gate_proj.MatMulNBits.qweight", "model.layers.27.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.27.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.27.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.27.mlp.up_proj.MatMulNBits.qweight", "model.layers.27.mlp.up_proj.MatMulNBits.scales.f", "model.layers.27.mlp.up_proj.MatMulNBits.qzeros", "model.layers.27.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.27/mlp/Mul/output_0.out3_27" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "3072", "8192" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.27.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.27/mlp/Mul/output_0.out3_27" ], "const_args": [ "model.layers.27.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.27.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.27.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.27.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.27/mlp/down_proj/MatMulNBits/output_0.out5_4_83" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8192" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_55", "type": "FlatRMSAdd", "in_args": [ "/model/layers.27/post_attention_layernorm/output_3.out4_54", "/model/layers.27/mlp/down_proj/MatMulNBits/output_0.out5_4_83" ], "const_args": [ "model.layers.28.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.28/input_layernorm/output_3.out4_55", "/model/layers.28/input_layernorm/output_0.out4_55" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_28", "type": "MladfMatMul", "in_args": [ "/model/layers.28/input_layernorm/output_0.out4_55" ], "const_args": [ "model.layers.28.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.28.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.28.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.28.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.28/attn/qk_proj/MatMulNBits/output_0.out5_4_84" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "6144" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.28.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.28/input_layernorm/output_0.out4_55" ], "const_args": [ "model.layers.28.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.28.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.28.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.28.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.28.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "115", "57" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "192" ] } } }, { "name": "/model/layers.28/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.28/attn/qk_proj/MatMulNBits/output_0.out5_4_84", "past_key_values.28.key", "past_key_values.28.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.28/attn/GroupQueryAttention/output_0.out2_28", "present.28.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "32" ] }, "kv_num_heads": { "type": "int", "value": [ "32" ] }, "scale": { "type": "float", "value": [ "0.10206207633018494" ] }, "local_window_size": { "type": "int", "value": [ "262144" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "32", "32", "1", "4096", "96" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "112", "56", "2", "0", "113", "57", "6", "0", "114", "56" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "192", "6", "0", "0", "192" ] } } }, { "name": "layers.28.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.28/attn/GroupQueryAttention/output_0.out2_28" ], "const_args": [ "model.layers.28.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.28.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.28.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.28.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.28/attn/o_proj/MatMulNBits/output_0.out5_4_85" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_56", "type": "FlatRMSAdd", "in_args": [ "/model/layers.28/input_layernorm/output_3.out4_55", "/model/layers.28/attn/o_proj/MatMulNBits/output_0.out5_4_85" ], "const_args": [ "model.layers.28.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.28/post_attention_layernorm/output_3.out4_56", "/model/layers.28/post_attention_layernorm/output_0.out4_56" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_28", "type": "FlatMLP", "in_args": [ "/model/layers.28/post_attention_layernorm/output_0.out4_56" ], "const_args": [ "model.layers.28.mlp.gate_proj.MatMulNBits.qweight", "model.layers.28.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.28.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.28.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.28.mlp.up_proj.MatMulNBits.qweight", "model.layers.28.mlp.up_proj.MatMulNBits.scales.f", "model.layers.28.mlp.up_proj.MatMulNBits.qzeros", "model.layers.28.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.28/mlp/Mul/output_0.out3_28" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "3072", "8192" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.28.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.28/mlp/Mul/output_0.out3_28" ], "const_args": [ "model.layers.28.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.28.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.28.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.28.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.28/mlp/down_proj/MatMulNBits/output_0.out5_4_86" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8192" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_57", "type": "FlatRMSAdd", "in_args": [ "/model/layers.28/post_attention_layernorm/output_3.out4_56", "/model/layers.28/mlp/down_proj/MatMulNBits/output_0.out5_4_86" ], "const_args": [ "model.layers.29.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.29/input_layernorm/output_3.out4_57", "/model/layers.29/input_layernorm/output_0.out4_57" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_29", "type": "MladfMatMul", "in_args": [ "/model/layers.29/input_layernorm/output_0.out4_57" ], "const_args": [ "model.layers.29.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.29.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.29.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.29.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.29/attn/qk_proj/MatMulNBits/output_0.out5_4_87" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "6144" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.29.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.29/input_layernorm/output_0.out4_57" ], "const_args": [ "model.layers.29.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.29.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.29.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.29.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.29.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "119", "59" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "192" ] } } }, { "name": "/model/layers.29/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.29/attn/qk_proj/MatMulNBits/output_0.out5_4_87", "past_key_values.29.key", "past_key_values.29.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.29/attn/GroupQueryAttention/output_0.out2_29", "present.29.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "32" ] }, "kv_num_heads": { "type": "int", "value": [ "32" ] }, "scale": { "type": "float", "value": [ "0.10206207633018494" ] }, "local_window_size": { "type": "int", "value": [ "262144" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "32", "32", "1", "4096", "96" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "116", "58", "2", "0", "117", "59", "6", "0", "118", "58" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "192", "6", "0", "0", "192" ] } } }, { "name": "layers.29.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.29/attn/GroupQueryAttention/output_0.out2_29" ], "const_args": [ "model.layers.29.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.29.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.29.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.29.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.29/attn/o_proj/MatMulNBits/output_0.out5_4_88" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_58", "type": "FlatRMSAdd", "in_args": [ "/model/layers.29/input_layernorm/output_3.out4_57", "/model/layers.29/attn/o_proj/MatMulNBits/output_0.out5_4_88" ], "const_args": [ "model.layers.29.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.29/post_attention_layernorm/output_3.out4_58", "/model/layers.29/post_attention_layernorm/output_0.out4_58" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_29", "type": "FlatMLP", "in_args": [ "/model/layers.29/post_attention_layernorm/output_0.out4_58" ], "const_args": [ "model.layers.29.mlp.gate_proj.MatMulNBits.qweight", "model.layers.29.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.29.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.29.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.29.mlp.up_proj.MatMulNBits.qweight", "model.layers.29.mlp.up_proj.MatMulNBits.scales.f", "model.layers.29.mlp.up_proj.MatMulNBits.qzeros", "model.layers.29.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.29/mlp/Mul/output_0.out3_29" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "3072", "8192" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.29.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.29/mlp/Mul/output_0.out3_29" ], "const_args": [ "model.layers.29.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.29.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.29.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.29.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.29/mlp/down_proj/MatMulNBits/output_0.out5_4_89" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8192" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_59", "type": "FlatRMSAdd", "in_args": [ "/model/layers.29/post_attention_layernorm/output_3.out4_58", "/model/layers.29/mlp/down_proj/MatMulNBits/output_0.out5_4_89" ], "const_args": [ "model.layers.30.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.30/input_layernorm/output_3.out4_59", "/model/layers.30/input_layernorm/output_0.out4_59" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_30", "type": "MladfMatMul", "in_args": [ "/model/layers.30/input_layernorm/output_0.out4_59" ], "const_args": [ "model.layers.30.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.30.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.30.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.30.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.30/attn/qk_proj/MatMulNBits/output_0.out5_4_90" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "6144" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.30.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.30/input_layernorm/output_0.out4_59" ], "const_args": [ "model.layers.30.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.30.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.30.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.30.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.30.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "123", "61" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "192" ] } } }, { "name": "/model/layers.30/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.30/attn/qk_proj/MatMulNBits/output_0.out5_4_90", "past_key_values.30.key", "past_key_values.30.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.30/attn/GroupQueryAttention/output_0.out2_30", "present.30.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "32" ] }, "kv_num_heads": { "type": "int", "value": [ "32" ] }, "scale": { "type": "float", "value": [ "0.10206207633018494" ] }, "local_window_size": { "type": "int", "value": [ "262144" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "32", "32", "1", "4096", "96" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "120", "60", "2", "0", "121", "61", "6", "0", "122", "60" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "192", "6", "0", "0", "192" ] } } }, { "name": "layers.30.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.30/attn/GroupQueryAttention/output_0.out2_30" ], "const_args": [ "model.layers.30.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.30.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.30.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.30.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.30/attn/o_proj/MatMulNBits/output_0.out5_4_91" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_60", "type": "FlatRMSAdd", "in_args": [ "/model/layers.30/input_layernorm/output_3.out4_59", "/model/layers.30/attn/o_proj/MatMulNBits/output_0.out5_4_91" ], "const_args": [ "model.layers.30.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.30/post_attention_layernorm/output_3.out4_60", "/model/layers.30/post_attention_layernorm/output_0.out4_60" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_30", "type": "FlatMLP", "in_args": [ "/model/layers.30/post_attention_layernorm/output_0.out4_60" ], "const_args": [ "model.layers.30.mlp.gate_proj.MatMulNBits.qweight", "model.layers.30.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.30.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.30.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.30.mlp.up_proj.MatMulNBits.qweight", "model.layers.30.mlp.up_proj.MatMulNBits.scales.f", "model.layers.30.mlp.up_proj.MatMulNBits.qzeros", "model.layers.30.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.30/mlp/Mul/output_0.out3_30" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "3072", "8192" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.30.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.30/mlp/Mul/output_0.out3_30" ], "const_args": [ "model.layers.30.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.30.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.30.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.30.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.30/mlp/down_proj/MatMulNBits/output_0.out5_4_92" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8192" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_61", "type": "FlatRMSAdd", "in_args": [ "/model/layers.30/post_attention_layernorm/output_3.out4_60", "/model/layers.30/mlp/down_proj/MatMulNBits/output_0.out5_4_92" ], "const_args": [ "model.layers.31.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.31/input_layernorm/output_3.out4_61", "/model/layers.31/input_layernorm/output_0.out4_61" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_31", "type": "MladfMatMul", "in_args": [ "/model/layers.31/input_layernorm/output_0.out4_61" ], "const_args": [ "model.layers.31.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.31.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.31.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.31.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.31/attn/qk_proj/MatMulNBits/output_0.out5_4_93" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "6144" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.31.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.31/input_layernorm/output_0.out4_61" ], "const_args": [ "model.layers.31.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.31.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.31.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.31.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.31.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "127", "63" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "192" ] } } }, { "name": "/model/layers.31/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.31/attn/qk_proj/MatMulNBits/output_0.out5_4_93", "past_key_values.31.key", "past_key_values.31.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.31/attn/GroupQueryAttention/output_0.out2_31", "present.31.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "32" ] }, "kv_num_heads": { "type": "int", "value": [ "32" ] }, "scale": { "type": "float", "value": [ "0.10206207633018494" ] }, "local_window_size": { "type": "int", "value": [ "262144" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "32", "32", "1", "4096", "96" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "124", "62", "2", "0", "125", "63", "6", "0", "126", "62" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "192", "6", "0", "0", "192" ] } } }, { "name": "layers.31.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.31/attn/GroupQueryAttention/output_0.out2_31" ], "const_args": [ "model.layers.31.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.31.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.31.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.31.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.31/attn/o_proj/MatMulNBits/output_0.out5_4_94" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_62", "type": "FlatRMSAdd", "in_args": [ "/model/layers.31/input_layernorm/output_3.out4_61", "/model/layers.31/attn/o_proj/MatMulNBits/output_0.out5_4_94" ], "const_args": [ "model.layers.31.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.31/post_attention_layernorm/output_3.out4_62", "/model/layers.31/post_attention_layernorm/output_0.out4_62" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_31", "type": "FlatMLP", "in_args": [ "/model/layers.31/post_attention_layernorm/output_0.out4_62" ], "const_args": [ "model.layers.31.mlp.gate_proj.MatMulNBits.qweight", "model.layers.31.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.31.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.31.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.31.mlp.up_proj.MatMulNBits.qweight", "model.layers.31.mlp.up_proj.MatMulNBits.scales.f", "model.layers.31.mlp.up_proj.MatMulNBits.qzeros", "model.layers.31.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.31/mlp/Mul/output_0.out3_31" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "3072", "8192" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.31.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.31/mlp/Mul/output_0.out3_31" ], "const_args": [ "model.layers.31.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.31.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.31.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.31.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.31/mlp/down_proj/MatMulNBits/output_0.out5_4_95" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8192" ] }, "N": { "type": "int", "value": [ "3072" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_63", "type": "FlatRMSAdd", "in_args": [ "/model/layers.31/post_attention_layernorm/output_3.out4_62", "/model/layers.31/mlp/down_proj/MatMulNBits/output_0.out5_4_95" ], "const_args": [ "model.layers.32.final_norm_layernorm.weight.bf" ], "out_args": [ "/model/layers.32/final_norm_layernorm/output_0.dummy", "/model/layers.32/final_norm_layernorm/output_0.out4_63" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "3072" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "/lm_head/MatMulNBits", "type": "MladfMatMul", "in_args": [ "/model/layers.32/final_norm_layernorm/output_0.out4_63" ], "const_args": [ "lm_head.MatMulNBits.qweight.preformat", "lm_head.MatMulNBits.bias.preformat", "lm_head.MatMulNBits.scales.preformat", "lm_head.MatMulNBits.qzeros.preformat" ], "out_args": [ "logits.out5_4_96" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "3072" ] }, "N": { "type": "int", "value": [ "32064" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } } ], "fused_tensors": { "in": { "buffer_size": 18560, "xrt_arg_id": 0, "packed_tensors": [ "/model/layers.0/input_layernorm/output_0.out5_4_0", "attention_mask_const_uint", "/model/embed_tokens/Gather/output_0.out4_0" ] }, "out": { "buffer_size": 70272, "xrt_arg_id": 1, "packed_tensors": [ "/model/layers.32/final_norm_layernorm/output_0.dummy", "logits.out5_4_96" ] }, "scratch": { "buffer_size": 2287616, "xrt_arg_id": 2, "packed_tensors": [ "/model/layers.0/attn/qk_proj/MatMulNBits/output_0.out5_4_0", "/model/layers.0/attn/GroupQueryAttention/output_0.out2_0", "/model/layers.0/attn/o_proj/MatMulNBits/output_0.out5_4_1", "/model/layers.0/post_attention_layernorm/output_3.out4_0", "/model/layers.0/post_attention_layernorm/output_0.out4_0", "/model/layers.0/mlp/Mul/output_0.out3_0", "/model/layers.0/mlp/down_proj/MatMulNBits/output_0.out5_4_2", "/model/layers.1/input_layernorm/output_3.out4_1", "/model/layers.1/input_layernorm/output_0.out4_1", "/model/layers.1/attn/qk_proj/MatMulNBits/output_0.out5_4_3", "/model/layers.1/attn/GroupQueryAttention/output_0.out2_1", "/model/layers.1/attn/o_proj/MatMulNBits/output_0.out5_4_4", "/model/layers.1/post_attention_layernorm/output_3.out4_2", "/model/layers.1/post_attention_layernorm/output_0.out4_2", "/model/layers.1/mlp/Mul/output_0.out3_1", "/model/layers.1/mlp/down_proj/MatMulNBits/output_0.out5_4_5", "/model/layers.2/input_layernorm/output_3.out4_3", "/model/layers.2/input_layernorm/output_0.out4_3", "/model/layers.2/attn/qk_proj/MatMulNBits/output_0.out5_4_6", "/model/layers.2/attn/GroupQueryAttention/output_0.out2_2", "/model/layers.2/attn/o_proj/MatMulNBits/output_0.out5_4_7", "/model/layers.2/post_attention_layernorm/output_3.out4_4", "/model/layers.2/post_attention_layernorm/output_0.out4_4", "/model/layers.2/mlp/Mul/output_0.out3_2", "/model/layers.2/mlp/down_proj/MatMulNBits/output_0.out5_4_8", "/model/layers.3/input_layernorm/output_3.out4_5", "/model/layers.3/input_layernorm/output_0.out4_5", "/model/layers.3/attn/qk_proj/MatMulNBits/output_0.out5_4_9", "/model/layers.3/attn/GroupQueryAttention/output_0.out2_3", "/model/layers.3/attn/o_proj/MatMulNBits/output_0.out5_4_10", "/model/layers.3/post_attention_layernorm/output_3.out4_6", "/model/layers.3/post_attention_layernorm/output_0.out4_6", "/model/layers.3/mlp/Mul/output_0.out3_3", "/model/layers.3/mlp/down_proj/MatMulNBits/output_0.out5_4_11", "/model/layers.4/input_layernorm/output_3.out4_7", "/model/layers.4/input_layernorm/output_0.out4_7", "/model/layers.4/attn/qk_proj/MatMulNBits/output_0.out5_4_12", "/model/layers.4/attn/GroupQueryAttention/output_0.out2_4", "/model/layers.4/attn/o_proj/MatMulNBits/output_0.out5_4_13", "/model/layers.4/post_attention_layernorm/output_3.out4_8", "/model/layers.4/post_attention_layernorm/output_0.out4_8", "/model/layers.4/mlp/Mul/output_0.out3_4", "/model/layers.4/mlp/down_proj/MatMulNBits/output_0.out5_4_14", "/model/layers.5/input_layernorm/output_3.out4_9", "/model/layers.5/input_layernorm/output_0.out4_9", "/model/layers.5/attn/qk_proj/MatMulNBits/output_0.out5_4_15", "/model/layers.5/attn/GroupQueryAttention/output_0.out2_5", "/model/layers.5/attn/o_proj/MatMulNBits/output_0.out5_4_16", "/model/layers.5/post_attention_layernorm/output_3.out4_10", "/model/layers.5/post_attention_layernorm/output_0.out4_10", "/model/layers.5/mlp/Mul/output_0.out3_5", "/model/layers.5/mlp/down_proj/MatMulNBits/output_0.out5_4_17", "/model/layers.6/input_layernorm/output_3.out4_11", "/model/layers.6/input_layernorm/output_0.out4_11", "/model/layers.6/attn/qk_proj/MatMulNBits/output_0.out5_4_18", "/model/layers.6/attn/GroupQueryAttention/output_0.out2_6", "/model/layers.6/attn/o_proj/MatMulNBits/output_0.out5_4_19", "/model/layers.6/post_attention_layernorm/output_3.out4_12", "/model/layers.6/post_attention_layernorm/output_0.out4_12", "/model/layers.6/mlp/Mul/output_0.out3_6", "/model/layers.6/mlp/down_proj/MatMulNBits/output_0.out5_4_20", "/model/layers.7/input_layernorm/output_3.out4_13", "/model/layers.7/input_layernorm/output_0.out4_13", "/model/layers.7/attn/qk_proj/MatMulNBits/output_0.out5_4_21", "/model/layers.7/attn/GroupQueryAttention/output_0.out2_7", "/model/layers.7/attn/o_proj/MatMulNBits/output_0.out5_4_22", "/model/layers.7/post_attention_layernorm/output_3.out4_14", "/model/layers.7/post_attention_layernorm/output_0.out4_14", "/model/layers.7/mlp/Mul/output_0.out3_7", "/model/layers.7/mlp/down_proj/MatMulNBits/output_0.out5_4_23", "/model/layers.8/input_layernorm/output_3.out4_15", "/model/layers.8/input_layernorm/output_0.out4_15", "/model/layers.8/attn/qk_proj/MatMulNBits/output_0.out5_4_24", "/model/layers.8/attn/GroupQueryAttention/output_0.out2_8", "/model/layers.8/attn/o_proj/MatMulNBits/output_0.out5_4_25", "/model/layers.8/post_attention_layernorm/output_3.out4_16", "/model/layers.8/post_attention_layernorm/output_0.out4_16", "/model/layers.8/mlp/Mul/output_0.out3_8", "/model/layers.8/mlp/down_proj/MatMulNBits/output_0.out5_4_26", "/model/layers.9/input_layernorm/output_3.out4_17", "/model/layers.9/input_layernorm/output_0.out4_17", "/model/layers.9/attn/qk_proj/MatMulNBits/output_0.out5_4_27", "/model/layers.9/attn/GroupQueryAttention/output_0.out2_9", "/model/layers.9/attn/o_proj/MatMulNBits/output_0.out5_4_28", "/model/layers.9/post_attention_layernorm/output_3.out4_18", "/model/layers.9/post_attention_layernorm/output_0.out4_18", "/model/layers.9/mlp/Mul/output_0.out3_9", "/model/layers.9/mlp/down_proj/MatMulNBits/output_0.out5_4_29", "/model/layers.10/input_layernorm/output_3.out4_19", "/model/layers.10/input_layernorm/output_0.out4_19", "/model/layers.10/attn/qk_proj/MatMulNBits/output_0.out5_4_30", "/model/layers.10/attn/GroupQueryAttention/output_0.out2_10", "/model/layers.10/attn/o_proj/MatMulNBits/output_0.out5_4_31", "/model/layers.10/post_attention_layernorm/output_3.out4_20", "/model/layers.10/post_attention_layernorm/output_0.out4_20", "/model/layers.10/mlp/Mul/output_0.out3_10", "/model/layers.10/mlp/down_proj/MatMulNBits/output_0.out5_4_32", "/model/layers.11/input_layernorm/output_3.out4_21", "/model/layers.11/input_layernorm/output_0.out4_21", "/model/layers.11/attn/qk_proj/MatMulNBits/output_0.out5_4_33", "/model/layers.11/attn/GroupQueryAttention/output_0.out2_11", "/model/layers.11/attn/o_proj/MatMulNBits/output_0.out5_4_34", "/model/layers.11/post_attention_layernorm/output_3.out4_22", "/model/layers.11/post_attention_layernorm/output_0.out4_22", "/model/layers.11/mlp/Mul/output_0.out3_11", "/model/layers.11/mlp/down_proj/MatMulNBits/output_0.out5_4_35", "/model/layers.12/input_layernorm/output_3.out4_23", "/model/layers.12/input_layernorm/output_0.out4_23", "/model/layers.12/attn/qk_proj/MatMulNBits/output_0.out5_4_36", "/model/layers.12/attn/GroupQueryAttention/output_0.out2_12", "/model/layers.12/attn/o_proj/MatMulNBits/output_0.out5_4_37", "/model/layers.12/post_attention_layernorm/output_3.out4_24", "/model/layers.12/post_attention_layernorm/output_0.out4_24", "/model/layers.12/mlp/Mul/output_0.out3_12", "/model/layers.12/mlp/down_proj/MatMulNBits/output_0.out5_4_38", "/model/layers.13/input_layernorm/output_3.out4_25", "/model/layers.13/input_layernorm/output_0.out4_25", "/model/layers.13/attn/qk_proj/MatMulNBits/output_0.out5_4_39", "/model/layers.13/attn/GroupQueryAttention/output_0.out2_13", "/model/layers.13/attn/o_proj/MatMulNBits/output_0.out5_4_40", "/model/layers.13/post_attention_layernorm/output_3.out4_26", "/model/layers.13/post_attention_layernorm/output_0.out4_26", "/model/layers.13/mlp/Mul/output_0.out3_13", "/model/layers.13/mlp/down_proj/MatMulNBits/output_0.out5_4_41", "/model/layers.14/input_layernorm/output_3.out4_27", "/model/layers.14/input_layernorm/output_0.out4_27", "/model/layers.14/attn/qk_proj/MatMulNBits/output_0.out5_4_42", "/model/layers.14/attn/GroupQueryAttention/output_0.out2_14", "/model/layers.14/attn/o_proj/MatMulNBits/output_0.out5_4_43", "/model/layers.14/post_attention_layernorm/output_3.out4_28", "/model/layers.14/post_attention_layernorm/output_0.out4_28", "/model/layers.14/mlp/Mul/output_0.out3_14", "/model/layers.14/mlp/down_proj/MatMulNBits/output_0.out5_4_44", "/model/layers.15/input_layernorm/output_3.out4_29", "/model/layers.15/input_layernorm/output_0.out4_29", "/model/layers.15/attn/qk_proj/MatMulNBits/output_0.out5_4_45", "/model/layers.15/attn/GroupQueryAttention/output_0.out2_15", "/model/layers.15/attn/o_proj/MatMulNBits/output_0.out5_4_46", "/model/layers.15/post_attention_layernorm/output_3.out4_30", "/model/layers.15/post_attention_layernorm/output_0.out4_30", "/model/layers.15/mlp/Mul/output_0.out3_15", "/model/layers.15/mlp/down_proj/MatMulNBits/output_0.out5_4_47", "/model/layers.16/input_layernorm/output_3.out4_31", "/model/layers.16/input_layernorm/output_0.out4_31", "/model/layers.16/attn/qk_proj/MatMulNBits/output_0.out5_4_48", "/model/layers.16/attn/GroupQueryAttention/output_0.out2_16", "/model/layers.16/attn/o_proj/MatMulNBits/output_0.out5_4_49", "/model/layers.16/post_attention_layernorm/output_3.out4_32", "/model/layers.16/post_attention_layernorm/output_0.out4_32", "/model/layers.16/mlp/Mul/output_0.out3_16", "/model/layers.16/mlp/down_proj/MatMulNBits/output_0.out5_4_50", "/model/layers.17/input_layernorm/output_3.out4_33", "/model/layers.17/input_layernorm/output_0.out4_33", "/model/layers.17/attn/qk_proj/MatMulNBits/output_0.out5_4_51", "/model/layers.17/attn/GroupQueryAttention/output_0.out2_17", "/model/layers.17/attn/o_proj/MatMulNBits/output_0.out5_4_52", "/model/layers.17/post_attention_layernorm/output_3.out4_34", "/model/layers.17/post_attention_layernorm/output_0.out4_34", "/model/layers.17/mlp/Mul/output_0.out3_17", "/model/layers.17/mlp/down_proj/MatMulNBits/output_0.out5_4_53", "/model/layers.18/input_layernorm/output_3.out4_35", "/model/layers.18/input_layernorm/output_0.out4_35", "/model/layers.18/attn/qk_proj/MatMulNBits/output_0.out5_4_54", "/model/layers.18/attn/GroupQueryAttention/output_0.out2_18", "/model/layers.18/attn/o_proj/MatMulNBits/output_0.out5_4_55", "/model/layers.18/post_attention_layernorm/output_3.out4_36", "/model/layers.18/post_attention_layernorm/output_0.out4_36", "/model/layers.18/mlp/Mul/output_0.out3_18", "/model/layers.18/mlp/down_proj/MatMulNBits/output_0.out5_4_56", "/model/layers.19/input_layernorm/output_3.out4_37", "/model/layers.19/input_layernorm/output_0.out4_37", "/model/layers.19/attn/qk_proj/MatMulNBits/output_0.out5_4_57", "/model/layers.19/attn/GroupQueryAttention/output_0.out2_19", "/model/layers.19/attn/o_proj/MatMulNBits/output_0.out5_4_58", "/model/layers.19/post_attention_layernorm/output_3.out4_38", "/model/layers.19/post_attention_layernorm/output_0.out4_38", "/model/layers.19/mlp/Mul/output_0.out3_19", "/model/layers.19/mlp/down_proj/MatMulNBits/output_0.out5_4_59", "/model/layers.20/input_layernorm/output_3.out4_39", "/model/layers.20/input_layernorm/output_0.out4_39", "/model/layers.20/attn/qk_proj/MatMulNBits/output_0.out5_4_60", "/model/layers.20/attn/GroupQueryAttention/output_0.out2_20", "/model/layers.20/attn/o_proj/MatMulNBits/output_0.out5_4_61", "/model/layers.20/post_attention_layernorm/output_3.out4_40", "/model/layers.20/post_attention_layernorm/output_0.out4_40", "/model/layers.20/mlp/Mul/output_0.out3_20", "/model/layers.20/mlp/down_proj/MatMulNBits/output_0.out5_4_62", "/model/layers.21/input_layernorm/output_3.out4_41", "/model/layers.21/input_layernorm/output_0.out4_41", "/model/layers.21/attn/qk_proj/MatMulNBits/output_0.out5_4_63", "/model/layers.21/attn/GroupQueryAttention/output_0.out2_21", "/model/layers.21/attn/o_proj/MatMulNBits/output_0.out5_4_64", "/model/layers.21/post_attention_layernorm/output_3.out4_42", "/model/layers.21/post_attention_layernorm/output_0.out4_42", "/model/layers.21/mlp/Mul/output_0.out3_21", "/model/layers.21/mlp/down_proj/MatMulNBits/output_0.out5_4_65", "/model/layers.22/input_layernorm/output_3.out4_43", "/model/layers.22/input_layernorm/output_0.out4_43", "/model/layers.22/attn/qk_proj/MatMulNBits/output_0.out5_4_66", "/model/layers.22/attn/GroupQueryAttention/output_0.out2_22", "/model/layers.22/attn/o_proj/MatMulNBits/output_0.out5_4_67", "/model/layers.22/post_attention_layernorm/output_3.out4_44", "/model/layers.22/post_attention_layernorm/output_0.out4_44", "/model/layers.22/mlp/Mul/output_0.out3_22", "/model/layers.22/mlp/down_proj/MatMulNBits/output_0.out5_4_68", "/model/layers.23/input_layernorm/output_3.out4_45", "/model/layers.23/input_layernorm/output_0.out4_45", "/model/layers.23/attn/qk_proj/MatMulNBits/output_0.out5_4_69", "/model/layers.23/attn/GroupQueryAttention/output_0.out2_23", "/model/layers.23/attn/o_proj/MatMulNBits/output_0.out5_4_70", "/model/layers.23/post_attention_layernorm/output_3.out4_46", "/model/layers.23/post_attention_layernorm/output_0.out4_46", "/model/layers.23/mlp/Mul/output_0.out3_23", "/model/layers.23/mlp/down_proj/MatMulNBits/output_0.out5_4_71", "/model/layers.24/input_layernorm/output_3.out4_47", "/model/layers.24/input_layernorm/output_0.out4_47", "/model/layers.24/attn/qk_proj/MatMulNBits/output_0.out5_4_72", "/model/layers.24/attn/GroupQueryAttention/output_0.out2_24", "/model/layers.24/attn/o_proj/MatMulNBits/output_0.out5_4_73", "/model/layers.24/post_attention_layernorm/output_3.out4_48", "/model/layers.24/post_attention_layernorm/output_0.out4_48", "/model/layers.24/mlp/Mul/output_0.out3_24", "/model/layers.24/mlp/down_proj/MatMulNBits/output_0.out5_4_74", "/model/layers.25/input_layernorm/output_3.out4_49", "/model/layers.25/input_layernorm/output_0.out4_49", "/model/layers.25/attn/qk_proj/MatMulNBits/output_0.out5_4_75", "/model/layers.25/attn/GroupQueryAttention/output_0.out2_25", "/model/layers.25/attn/o_proj/MatMulNBits/output_0.out5_4_76", "/model/layers.25/post_attention_layernorm/output_3.out4_50", "/model/layers.25/post_attention_layernorm/output_0.out4_50", "/model/layers.25/mlp/Mul/output_0.out3_25", "/model/layers.25/mlp/down_proj/MatMulNBits/output_0.out5_4_77", "/model/layers.26/input_layernorm/output_3.out4_51", "/model/layers.26/input_layernorm/output_0.out4_51", "/model/layers.26/attn/qk_proj/MatMulNBits/output_0.out5_4_78", "/model/layers.26/attn/GroupQueryAttention/output_0.out2_26", "/model/layers.26/attn/o_proj/MatMulNBits/output_0.out5_4_79", "/model/layers.26/post_attention_layernorm/output_3.out4_52", "/model/layers.26/post_attention_layernorm/output_0.out4_52", "/model/layers.26/mlp/Mul/output_0.out3_26", "/model/layers.26/mlp/down_proj/MatMulNBits/output_0.out5_4_80", "/model/layers.27/input_layernorm/output_3.out4_53", "/model/layers.27/input_layernorm/output_0.out4_53", "/model/layers.27/attn/qk_proj/MatMulNBits/output_0.out5_4_81", "/model/layers.27/attn/GroupQueryAttention/output_0.out2_27", "/model/layers.27/attn/o_proj/MatMulNBits/output_0.out5_4_82", "/model/layers.27/post_attention_layernorm/output_3.out4_54", "/model/layers.27/post_attention_layernorm/output_0.out4_54", "/model/layers.27/mlp/Mul/output_0.out3_27", "/model/layers.27/mlp/down_proj/MatMulNBits/output_0.out5_4_83", "/model/layers.28/input_layernorm/output_3.out4_55", "/model/layers.28/input_layernorm/output_0.out4_55", "/model/layers.28/attn/qk_proj/MatMulNBits/output_0.out5_4_84", "/model/layers.28/attn/GroupQueryAttention/output_0.out2_28", "/model/layers.28/attn/o_proj/MatMulNBits/output_0.out5_4_85", "/model/layers.28/post_attention_layernorm/output_3.out4_56", "/model/layers.28/post_attention_layernorm/output_0.out4_56", "/model/layers.28/mlp/Mul/output_0.out3_28", "/model/layers.28/mlp/down_proj/MatMulNBits/output_0.out5_4_86", "/model/layers.29/input_layernorm/output_3.out4_57", "/model/layers.29/input_layernorm/output_0.out4_57", "/model/layers.29/attn/qk_proj/MatMulNBits/output_0.out5_4_87", "/model/layers.29/attn/GroupQueryAttention/output_0.out2_29", "/model/layers.29/attn/o_proj/MatMulNBits/output_0.out5_4_88", "/model/layers.29/post_attention_layernorm/output_3.out4_58", "/model/layers.29/post_attention_layernorm/output_0.out4_58", "/model/layers.29/mlp/Mul/output_0.out3_29", "/model/layers.29/mlp/down_proj/MatMulNBits/output_0.out5_4_89", "/model/layers.30/input_layernorm/output_3.out4_59", "/model/layers.30/input_layernorm/output_0.out4_59", "/model/layers.30/attn/qk_proj/MatMulNBits/output_0.out5_4_90", "/model/layers.30/attn/GroupQueryAttention/output_0.out2_30", "/model/layers.30/attn/o_proj/MatMulNBits/output_0.out5_4_91", "/model/layers.30/post_attention_layernorm/output_3.out4_60", "/model/layers.30/post_attention_layernorm/output_0.out4_60", "/model/layers.30/mlp/Mul/output_0.out3_30", "/model/layers.30/mlp/down_proj/MatMulNBits/output_0.out5_4_92", "/model/layers.31/input_layernorm/output_3.out4_61", "/model/layers.31/input_layernorm/output_0.out4_61", "/model/layers.31/attn/qk_proj/MatMulNBits/output_0.out5_4_93", "/model/layers.31/attn/GroupQueryAttention/output_0.out2_31", "/model/layers.31/attn/o_proj/MatMulNBits/output_0.out5_4_94", "/model/layers.31/post_attention_layernorm/output_3.out4_62", "/model/layers.31/post_attention_layernorm/output_0.out4_62", "/model/layers.31/mlp/Mul/output_0.out3_31", "/model/layers.31/mlp/down_proj/MatMulNBits/output_0.out5_4_95", "/model/layers.32/final_norm_layernorm/output_0.out4_63" ] }, "const": { "buffer_size": 3060771584, "xrt_arg_id": 3, "packed_tensors": [ "model.layers.0.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.0.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.0.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.0.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.0.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.0.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.0.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.0.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.0.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.0.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.0.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.0.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.0.post_attention_layernorm.weight.bf", "model.layers.0.mlp.gate_proj.MatMulNBits.qweight", "model.layers.0.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.0.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.0.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.0.mlp.up_proj.MatMulNBits.qweight", "model.layers.0.mlp.up_proj.MatMulNBits.scales.f", "model.layers.0.mlp.up_proj.MatMulNBits.qzeros", "model.layers.0.mlp.up_proj.MatMulNBits.bias.f", "model.layers.0.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.0.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.0.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.0.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.1.input_layernorm.weight.bf", "model.layers.1.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.1.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.1.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.1.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.1.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.1.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.1.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.1.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.1.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.1.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.1.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.1.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.1.post_attention_layernorm.weight.bf", "model.layers.1.mlp.gate_proj.MatMulNBits.qweight", "model.layers.1.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.1.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.1.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.1.mlp.up_proj.MatMulNBits.qweight", "model.layers.1.mlp.up_proj.MatMulNBits.scales.f", "model.layers.1.mlp.up_proj.MatMulNBits.qzeros", "model.layers.1.mlp.up_proj.MatMulNBits.bias.f", "model.layers.1.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.1.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.1.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.1.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.2.input_layernorm.weight.bf", "model.layers.2.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.2.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.2.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.2.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.2.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.2.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.2.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.2.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.2.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.2.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.2.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.2.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.2.post_attention_layernorm.weight.bf", "model.layers.2.mlp.gate_proj.MatMulNBits.qweight", "model.layers.2.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.2.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.2.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.2.mlp.up_proj.MatMulNBits.qweight", "model.layers.2.mlp.up_proj.MatMulNBits.scales.f", "model.layers.2.mlp.up_proj.MatMulNBits.qzeros", "model.layers.2.mlp.up_proj.MatMulNBits.bias.f", "model.layers.2.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.2.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.2.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.2.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.3.input_layernorm.weight.bf", "model.layers.3.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.3.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.3.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.3.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.3.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.3.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.3.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.3.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.3.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.3.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.3.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.3.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.3.post_attention_layernorm.weight.bf", "model.layers.3.mlp.gate_proj.MatMulNBits.qweight", "model.layers.3.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.3.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.3.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.3.mlp.up_proj.MatMulNBits.qweight", "model.layers.3.mlp.up_proj.MatMulNBits.scales.f", "model.layers.3.mlp.up_proj.MatMulNBits.qzeros", "model.layers.3.mlp.up_proj.MatMulNBits.bias.f", "model.layers.3.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.3.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.3.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.3.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.4.input_layernorm.weight.bf", "model.layers.4.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.4.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.4.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.4.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.4.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.4.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.4.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.4.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.4.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.4.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.4.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.4.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.4.post_attention_layernorm.weight.bf", "model.layers.4.mlp.gate_proj.MatMulNBits.qweight", "model.layers.4.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.4.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.4.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.4.mlp.up_proj.MatMulNBits.qweight", "model.layers.4.mlp.up_proj.MatMulNBits.scales.f", "model.layers.4.mlp.up_proj.MatMulNBits.qzeros", "model.layers.4.mlp.up_proj.MatMulNBits.bias.f", "model.layers.4.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.4.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.4.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.4.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.5.input_layernorm.weight.bf", "model.layers.5.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.5.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.5.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.5.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.5.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.5.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.5.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.5.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.5.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.5.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.5.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.5.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.5.post_attention_layernorm.weight.bf", "model.layers.5.mlp.gate_proj.MatMulNBits.qweight", "model.layers.5.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.5.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.5.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.5.mlp.up_proj.MatMulNBits.qweight", "model.layers.5.mlp.up_proj.MatMulNBits.scales.f", "model.layers.5.mlp.up_proj.MatMulNBits.qzeros", "model.layers.5.mlp.up_proj.MatMulNBits.bias.f", "model.layers.5.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.5.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.5.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.5.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.6.input_layernorm.weight.bf", "model.layers.6.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.6.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.6.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.6.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.6.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.6.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.6.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.6.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.6.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.6.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.6.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.6.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.6.post_attention_layernorm.weight.bf", "model.layers.6.mlp.gate_proj.MatMulNBits.qweight", "model.layers.6.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.6.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.6.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.6.mlp.up_proj.MatMulNBits.qweight", "model.layers.6.mlp.up_proj.MatMulNBits.scales.f", "model.layers.6.mlp.up_proj.MatMulNBits.qzeros", "model.layers.6.mlp.up_proj.MatMulNBits.bias.f", "model.layers.6.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.6.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.6.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.6.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.7.input_layernorm.weight.bf", "model.layers.7.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.7.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.7.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.7.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.7.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.7.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.7.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.7.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.7.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.7.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.7.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.7.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.7.post_attention_layernorm.weight.bf", "model.layers.7.mlp.gate_proj.MatMulNBits.qweight", "model.layers.7.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.7.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.7.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.7.mlp.up_proj.MatMulNBits.qweight", "model.layers.7.mlp.up_proj.MatMulNBits.scales.f", "model.layers.7.mlp.up_proj.MatMulNBits.qzeros", "model.layers.7.mlp.up_proj.MatMulNBits.bias.f", "model.layers.7.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.7.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.7.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.7.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.8.input_layernorm.weight.bf", "model.layers.8.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.8.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.8.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.8.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.8.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.8.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.8.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.8.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.8.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.8.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.8.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.8.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.8.post_attention_layernorm.weight.bf", "model.layers.8.mlp.gate_proj.MatMulNBits.qweight", "model.layers.8.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.8.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.8.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.8.mlp.up_proj.MatMulNBits.qweight", "model.layers.8.mlp.up_proj.MatMulNBits.scales.f", "model.layers.8.mlp.up_proj.MatMulNBits.qzeros", "model.layers.8.mlp.up_proj.MatMulNBits.bias.f", "model.layers.8.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.8.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.8.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.8.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.9.input_layernorm.weight.bf", "model.layers.9.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.9.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.9.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.9.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.9.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.9.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.9.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.9.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.9.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.9.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.9.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.9.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.9.post_attention_layernorm.weight.bf", "model.layers.9.mlp.gate_proj.MatMulNBits.qweight", "model.layers.9.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.9.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.9.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.9.mlp.up_proj.MatMulNBits.qweight", "model.layers.9.mlp.up_proj.MatMulNBits.scales.f", "model.layers.9.mlp.up_proj.MatMulNBits.qzeros", "model.layers.9.mlp.up_proj.MatMulNBits.bias.f", "model.layers.9.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.9.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.9.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.9.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.10.input_layernorm.weight.bf", "model.layers.10.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.10.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.10.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.10.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.10.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.10.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.10.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.10.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.10.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.10.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.10.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.10.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.10.post_attention_layernorm.weight.bf", "model.layers.10.mlp.gate_proj.MatMulNBits.qweight", "model.layers.10.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.10.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.10.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.10.mlp.up_proj.MatMulNBits.qweight", "model.layers.10.mlp.up_proj.MatMulNBits.scales.f", "model.layers.10.mlp.up_proj.MatMulNBits.qzeros", "model.layers.10.mlp.up_proj.MatMulNBits.bias.f", "model.layers.10.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.10.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.10.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.10.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.11.input_layernorm.weight.bf", "model.layers.11.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.11.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.11.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.11.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.11.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.11.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.11.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.11.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.11.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.11.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.11.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.11.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.11.post_attention_layernorm.weight.bf", "model.layers.11.mlp.gate_proj.MatMulNBits.qweight", "model.layers.11.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.11.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.11.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.11.mlp.up_proj.MatMulNBits.qweight", "model.layers.11.mlp.up_proj.MatMulNBits.scales.f", "model.layers.11.mlp.up_proj.MatMulNBits.qzeros", "model.layers.11.mlp.up_proj.MatMulNBits.bias.f", "model.layers.11.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.11.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.11.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.11.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.12.input_layernorm.weight.bf", "model.layers.12.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.12.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.12.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.12.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.12.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.12.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.12.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.12.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.12.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.12.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.12.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.12.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.12.post_attention_layernorm.weight.bf", "model.layers.12.mlp.gate_proj.MatMulNBits.qweight", "model.layers.12.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.12.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.12.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.12.mlp.up_proj.MatMulNBits.qweight", "model.layers.12.mlp.up_proj.MatMulNBits.scales.f", "model.layers.12.mlp.up_proj.MatMulNBits.qzeros", "model.layers.12.mlp.up_proj.MatMulNBits.bias.f", "model.layers.12.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.12.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.12.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.12.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.13.input_layernorm.weight.bf", "model.layers.13.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.13.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.13.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.13.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.13.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.13.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.13.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.13.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.13.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.13.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.13.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.13.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.13.post_attention_layernorm.weight.bf", "model.layers.13.mlp.gate_proj.MatMulNBits.qweight", "model.layers.13.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.13.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.13.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.13.mlp.up_proj.MatMulNBits.qweight", "model.layers.13.mlp.up_proj.MatMulNBits.scales.f", "model.layers.13.mlp.up_proj.MatMulNBits.qzeros", "model.layers.13.mlp.up_proj.MatMulNBits.bias.f", "model.layers.13.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.13.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.13.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.13.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.14.input_layernorm.weight.bf", "model.layers.14.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.14.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.14.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.14.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.14.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.14.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.14.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.14.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.14.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.14.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.14.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.14.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.14.post_attention_layernorm.weight.bf", "model.layers.14.mlp.gate_proj.MatMulNBits.qweight", "model.layers.14.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.14.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.14.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.14.mlp.up_proj.MatMulNBits.qweight", "model.layers.14.mlp.up_proj.MatMulNBits.scales.f", "model.layers.14.mlp.up_proj.MatMulNBits.qzeros", "model.layers.14.mlp.up_proj.MatMulNBits.bias.f", "model.layers.14.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.14.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.14.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.14.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.15.input_layernorm.weight.bf", "model.layers.15.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.15.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.15.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.15.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.15.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.15.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.15.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.15.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.15.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.15.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.15.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.15.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.15.post_attention_layernorm.weight.bf", "model.layers.15.mlp.gate_proj.MatMulNBits.qweight", "model.layers.15.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.15.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.15.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.15.mlp.up_proj.MatMulNBits.qweight", "model.layers.15.mlp.up_proj.MatMulNBits.scales.f", "model.layers.15.mlp.up_proj.MatMulNBits.qzeros", "model.layers.15.mlp.up_proj.MatMulNBits.bias.f", "model.layers.15.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.15.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.15.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.15.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.16.input_layernorm.weight.bf", "model.layers.16.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.16.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.16.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.16.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.16.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.16.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.16.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.16.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.16.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.16.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.16.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.16.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.16.post_attention_layernorm.weight.bf", "model.layers.16.mlp.gate_proj.MatMulNBits.qweight", "model.layers.16.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.16.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.16.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.16.mlp.up_proj.MatMulNBits.qweight", "model.layers.16.mlp.up_proj.MatMulNBits.scales.f", "model.layers.16.mlp.up_proj.MatMulNBits.qzeros", "model.layers.16.mlp.up_proj.MatMulNBits.bias.f", "model.layers.16.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.16.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.16.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.16.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.17.input_layernorm.weight.bf", "model.layers.17.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.17.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.17.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.17.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.17.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.17.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.17.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.17.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.17.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.17.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.17.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.17.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.17.post_attention_layernorm.weight.bf", "model.layers.17.mlp.gate_proj.MatMulNBits.qweight", "model.layers.17.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.17.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.17.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.17.mlp.up_proj.MatMulNBits.qweight", "model.layers.17.mlp.up_proj.MatMulNBits.scales.f", "model.layers.17.mlp.up_proj.MatMulNBits.qzeros", "model.layers.17.mlp.up_proj.MatMulNBits.bias.f", "model.layers.17.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.17.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.17.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.17.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.18.input_layernorm.weight.bf", "model.layers.18.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.18.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.18.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.18.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.18.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.18.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.18.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.18.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.18.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.18.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.18.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.18.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.18.post_attention_layernorm.weight.bf", "model.layers.18.mlp.gate_proj.MatMulNBits.qweight", "model.layers.18.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.18.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.18.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.18.mlp.up_proj.MatMulNBits.qweight", "model.layers.18.mlp.up_proj.MatMulNBits.scales.f", "model.layers.18.mlp.up_proj.MatMulNBits.qzeros", "model.layers.18.mlp.up_proj.MatMulNBits.bias.f", "model.layers.18.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.18.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.18.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.18.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.19.input_layernorm.weight.bf", "model.layers.19.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.19.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.19.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.19.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.19.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.19.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.19.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.19.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.19.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.19.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.19.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.19.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.19.post_attention_layernorm.weight.bf", "model.layers.19.mlp.gate_proj.MatMulNBits.qweight", "model.layers.19.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.19.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.19.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.19.mlp.up_proj.MatMulNBits.qweight", "model.layers.19.mlp.up_proj.MatMulNBits.scales.f", "model.layers.19.mlp.up_proj.MatMulNBits.qzeros", "model.layers.19.mlp.up_proj.MatMulNBits.bias.f", "model.layers.19.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.19.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.19.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.19.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.20.input_layernorm.weight.bf", "model.layers.20.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.20.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.20.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.20.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.20.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.20.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.20.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.20.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.20.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.20.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.20.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.20.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.20.post_attention_layernorm.weight.bf", "model.layers.20.mlp.gate_proj.MatMulNBits.qweight", "model.layers.20.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.20.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.20.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.20.mlp.up_proj.MatMulNBits.qweight", "model.layers.20.mlp.up_proj.MatMulNBits.scales.f", "model.layers.20.mlp.up_proj.MatMulNBits.qzeros", "model.layers.20.mlp.up_proj.MatMulNBits.bias.f", "model.layers.20.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.20.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.20.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.20.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.21.input_layernorm.weight.bf", "model.layers.21.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.21.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.21.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.21.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.21.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.21.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.21.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.21.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.21.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.21.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.21.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.21.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.21.post_attention_layernorm.weight.bf", "model.layers.21.mlp.gate_proj.MatMulNBits.qweight", "model.layers.21.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.21.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.21.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.21.mlp.up_proj.MatMulNBits.qweight", "model.layers.21.mlp.up_proj.MatMulNBits.scales.f", "model.layers.21.mlp.up_proj.MatMulNBits.qzeros", "model.layers.21.mlp.up_proj.MatMulNBits.bias.f", "model.layers.21.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.21.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.21.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.21.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.22.input_layernorm.weight.bf", "model.layers.22.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.22.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.22.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.22.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.22.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.22.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.22.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.22.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.22.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.22.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.22.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.22.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.22.post_attention_layernorm.weight.bf", "model.layers.22.mlp.gate_proj.MatMulNBits.qweight", "model.layers.22.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.22.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.22.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.22.mlp.up_proj.MatMulNBits.qweight", "model.layers.22.mlp.up_proj.MatMulNBits.scales.f", "model.layers.22.mlp.up_proj.MatMulNBits.qzeros", "model.layers.22.mlp.up_proj.MatMulNBits.bias.f", "model.layers.22.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.22.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.22.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.22.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.23.input_layernorm.weight.bf", "model.layers.23.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.23.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.23.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.23.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.23.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.23.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.23.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.23.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.23.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.23.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.23.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.23.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.23.post_attention_layernorm.weight.bf", "model.layers.23.mlp.gate_proj.MatMulNBits.qweight", "model.layers.23.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.23.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.23.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.23.mlp.up_proj.MatMulNBits.qweight", "model.layers.23.mlp.up_proj.MatMulNBits.scales.f", "model.layers.23.mlp.up_proj.MatMulNBits.qzeros", "model.layers.23.mlp.up_proj.MatMulNBits.bias.f", "model.layers.23.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.23.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.23.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.23.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.24.input_layernorm.weight.bf", "model.layers.24.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.24.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.24.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.24.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.24.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.24.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.24.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.24.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.24.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.24.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.24.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.24.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.24.post_attention_layernorm.weight.bf", "model.layers.24.mlp.gate_proj.MatMulNBits.qweight", "model.layers.24.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.24.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.24.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.24.mlp.up_proj.MatMulNBits.qweight", "model.layers.24.mlp.up_proj.MatMulNBits.scales.f", "model.layers.24.mlp.up_proj.MatMulNBits.qzeros", "model.layers.24.mlp.up_proj.MatMulNBits.bias.f", "model.layers.24.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.24.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.24.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.24.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.25.input_layernorm.weight.bf", "model.layers.25.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.25.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.25.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.25.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.25.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.25.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.25.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.25.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.25.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.25.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.25.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.25.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.25.post_attention_layernorm.weight.bf", "model.layers.25.mlp.gate_proj.MatMulNBits.qweight", "model.layers.25.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.25.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.25.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.25.mlp.up_proj.MatMulNBits.qweight", "model.layers.25.mlp.up_proj.MatMulNBits.scales.f", "model.layers.25.mlp.up_proj.MatMulNBits.qzeros", "model.layers.25.mlp.up_proj.MatMulNBits.bias.f", "model.layers.25.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.25.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.25.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.25.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.26.input_layernorm.weight.bf", "model.layers.26.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.26.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.26.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.26.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.26.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.26.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.26.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.26.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.26.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.26.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.26.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.26.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.26.post_attention_layernorm.weight.bf", "model.layers.26.mlp.gate_proj.MatMulNBits.qweight", "model.layers.26.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.26.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.26.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.26.mlp.up_proj.MatMulNBits.qweight", "model.layers.26.mlp.up_proj.MatMulNBits.scales.f", "model.layers.26.mlp.up_proj.MatMulNBits.qzeros", "model.layers.26.mlp.up_proj.MatMulNBits.bias.f", "model.layers.26.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.26.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.26.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.26.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.27.input_layernorm.weight.bf", "model.layers.27.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.27.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.27.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.27.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.27.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.27.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.27.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.27.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.27.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.27.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.27.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.27.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.27.post_attention_layernorm.weight.bf", "model.layers.27.mlp.gate_proj.MatMulNBits.qweight", "model.layers.27.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.27.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.27.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.27.mlp.up_proj.MatMulNBits.qweight", "model.layers.27.mlp.up_proj.MatMulNBits.scales.f", "model.layers.27.mlp.up_proj.MatMulNBits.qzeros", "model.layers.27.mlp.up_proj.MatMulNBits.bias.f", "model.layers.27.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.27.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.27.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.27.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.28.input_layernorm.weight.bf", "model.layers.28.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.28.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.28.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.28.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.28.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.28.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.28.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.28.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.28.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.28.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.28.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.28.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.28.post_attention_layernorm.weight.bf", "model.layers.28.mlp.gate_proj.MatMulNBits.qweight", "model.layers.28.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.28.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.28.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.28.mlp.up_proj.MatMulNBits.qweight", "model.layers.28.mlp.up_proj.MatMulNBits.scales.f", "model.layers.28.mlp.up_proj.MatMulNBits.qzeros", "model.layers.28.mlp.up_proj.MatMulNBits.bias.f", "model.layers.28.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.28.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.28.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.28.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.29.input_layernorm.weight.bf", "model.layers.29.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.29.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.29.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.29.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.29.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.29.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.29.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.29.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.29.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.29.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.29.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.29.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.29.post_attention_layernorm.weight.bf", "model.layers.29.mlp.gate_proj.MatMulNBits.qweight", "model.layers.29.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.29.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.29.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.29.mlp.up_proj.MatMulNBits.qweight", "model.layers.29.mlp.up_proj.MatMulNBits.scales.f", "model.layers.29.mlp.up_proj.MatMulNBits.qzeros", "model.layers.29.mlp.up_proj.MatMulNBits.bias.f", "model.layers.29.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.29.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.29.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.29.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.30.input_layernorm.weight.bf", "model.layers.30.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.30.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.30.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.30.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.30.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.30.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.30.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.30.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.30.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.30.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.30.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.30.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.30.post_attention_layernorm.weight.bf", "model.layers.30.mlp.gate_proj.MatMulNBits.qweight", "model.layers.30.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.30.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.30.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.30.mlp.up_proj.MatMulNBits.qweight", "model.layers.30.mlp.up_proj.MatMulNBits.scales.f", "model.layers.30.mlp.up_proj.MatMulNBits.qzeros", "model.layers.30.mlp.up_proj.MatMulNBits.bias.f", "model.layers.30.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.30.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.30.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.30.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.31.input_layernorm.weight.bf", "model.layers.31.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.31.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.31.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.31.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.31.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.31.attn.v_proj.MatMulNBits.bias.preformat", "model.layers.31.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.31.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.31.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.31.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.31.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.31.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.31.post_attention_layernorm.weight.bf", "model.layers.31.mlp.gate_proj.MatMulNBits.qweight", "model.layers.31.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.31.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.31.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.31.mlp.up_proj.MatMulNBits.qweight", "model.layers.31.mlp.up_proj.MatMulNBits.scales.f", "model.layers.31.mlp.up_proj.MatMulNBits.qzeros", "model.layers.31.mlp.up_proj.MatMulNBits.bias.f", "model.layers.31.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.31.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.31.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.31.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.32.final_norm_layernorm.weight.bf", "lm_head.MatMulNBits.qweight.preformat", "lm_head.MatMulNBits.bias.preformat", "lm_head.MatMulNBits.scales.preformat", "lm_head.MatMulNBits.qzeros.preformat" ] }, "super_instr": { "buffer_size": 0, "xrt_arg_id": 4, "packed_tensors": [] }, "ext_buf_0": { "buffer_size": 1610612736, "xrt_arg_id": 5, "packed_tensors": [ "past_key_values.0.key", "past_key_values.0.value", "present.0.key", "present.0.value", "past_key_values.1.key", "past_key_values.1.value", "present.1.key", "present.1.value", "past_key_values.2.key", "past_key_values.2.value", "present.2.key", "present.2.value", "past_key_values.3.key", "past_key_values.3.value", "present.3.key", "present.3.value", "past_key_values.4.key", "past_key_values.4.value", "present.4.key", "present.4.value", "past_key_values.5.key", "past_key_values.5.value", "present.5.key", "present.5.value", "past_key_values.6.key", "past_key_values.6.value", "present.6.key", "present.6.value", "past_key_values.7.key", "past_key_values.7.value", "present.7.key", "present.7.value", "past_key_values.8.key", "past_key_values.8.value", "present.8.key", "present.8.value", "past_key_values.9.key", "past_key_values.9.value", "present.9.key", "present.9.value", "past_key_values.10.key", "past_key_values.10.value", "present.10.key", "present.10.value", "past_key_values.11.key", "past_key_values.11.value", "present.11.key", "present.11.value", "past_key_values.12.key", "past_key_values.12.value", "present.12.key", "present.12.value", "past_key_values.13.key", "past_key_values.13.value", "present.13.key", "present.13.value", "past_key_values.14.key", "past_key_values.14.value", "present.14.key", "present.14.value", "past_key_values.15.key", "past_key_values.15.value", "present.15.key", "present.15.value", "past_key_values.16.key", "past_key_values.16.value", "present.16.key", "present.16.value", "past_key_values.17.key", "past_key_values.17.value", "present.17.key", "present.17.value", "past_key_values.18.key", "past_key_values.18.value", "present.18.key", "present.18.value", "past_key_values.19.key", "past_key_values.19.value", "present.19.key", "present.19.value", "past_key_values.20.key", "past_key_values.20.value", "present.20.key", "present.20.value", "past_key_values.21.key", "past_key_values.21.value", "present.21.key", "present.21.value", "past_key_values.22.key", "past_key_values.22.value", "present.22.key", "present.22.value", "past_key_values.23.key", "past_key_values.23.value", "present.23.key", "present.23.value", "past_key_values.24.key", "past_key_values.24.value", "present.24.key", "present.24.value", "past_key_values.25.key", "past_key_values.25.value", "present.25.key", "present.25.value", "past_key_values.26.key", "past_key_values.26.value", "present.26.key", "present.26.value", "past_key_values.27.key", "past_key_values.27.value", "present.27.key", "present.27.value", "past_key_values.28.key", "past_key_values.28.value", "present.28.key", "present.28.value", "past_key_values.29.key", "past_key_values.29.value", "present.29.key", "present.29.value", "past_key_values.30.key", "past_key_values.30.value", "present.30.key", "present.30.value", "past_key_values.31.key", "past_key_values.31.value", "present.31.key", "present.31.value" ] }, "ext_buf_1": { "buffer_size": 25952256, "xrt_arg_id": 6, "packed_tensors": [ "sin_cos_cache_token" ] } }, "tensor_map": { "/model/layers.0/input_layernorm/output_0.out5_4_0": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 6144 }, "attention_mask_const_uint": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "uint32", "shape": [ 1 ], "size_in_bytes": 4, "op_tensor_size": 4, "offset": 18556 }, "/model/embed_tokens/Gather/output_0.out4_0": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 12292 }, "/model/layers.32/final_norm_layernorm/output_0.dummy": { "packed_buffer_label": "out", "xrt_arg_id": 1, "dtype": "float16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 0 }, "logits.out5_4_96": { "packed_buffer_label": "out", "xrt_arg_id": 1, "dtype": "bfloat16", "shape": [ 1, 1, 32064 ], "size_in_bytes": 64128, "op_tensor_size": 64128, "offset": 6144 }, "/model/layers.0/attn/qk_proj/MatMulNBits/output_0.out5_4_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 6144 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 0 }, "/model/layers.0/attn/GroupQueryAttention/output_0.out2_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 12288 }, "/model/layers.0/attn/o_proj/MatMulNBits/output_0.out5_4_1": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 18432 }, "/model/layers.0/post_attention_layernorm/output_3.out4_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 24576 }, "/model/layers.0/post_attention_layernorm/output_0.out4_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 30720 }, "/model/layers.0/mlp/Mul/output_0.out3_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8192 ], "size_in_bytes": 16384, "op_tensor_size": 16384, "offset": 36864 }, "/model/layers.0/mlp/down_proj/MatMulNBits/output_0.out5_4_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 53248 }, "/model/layers.1/input_layernorm/output_3.out4_1": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 59392 }, "/model/layers.1/input_layernorm/output_0.out4_1": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 65536 }, "/model/layers.1/attn/qk_proj/MatMulNBits/output_0.out5_4_3": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 6144 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 71680 }, "/model/layers.1/attn/GroupQueryAttention/output_0.out2_1": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 83968 }, "/model/layers.1/attn/o_proj/MatMulNBits/output_0.out5_4_4": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 90112 }, "/model/layers.1/post_attention_layernorm/output_3.out4_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 96256 }, "/model/layers.1/post_attention_layernorm/output_0.out4_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 102400 }, "/model/layers.1/mlp/Mul/output_0.out3_1": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8192 ], "size_in_bytes": 16384, "op_tensor_size": 16384, "offset": 108544 }, "/model/layers.1/mlp/down_proj/MatMulNBits/output_0.out5_4_5": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 124928 }, "/model/layers.2/input_layernorm/output_3.out4_3": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 131072 }, "/model/layers.2/input_layernorm/output_0.out4_3": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 137216 }, "/model/layers.2/attn/qk_proj/MatMulNBits/output_0.out5_4_6": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 6144 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 143360 }, "/model/layers.2/attn/GroupQueryAttention/output_0.out2_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 155648 }, "/model/layers.2/attn/o_proj/MatMulNBits/output_0.out5_4_7": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 161792 }, "/model/layers.2/post_attention_layernorm/output_3.out4_4": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 167936 }, "/model/layers.2/post_attention_layernorm/output_0.out4_4": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 174080 }, "/model/layers.2/mlp/Mul/output_0.out3_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8192 ], "size_in_bytes": 16384, "op_tensor_size": 16384, "offset": 180224 }, "/model/layers.2/mlp/down_proj/MatMulNBits/output_0.out5_4_8": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 196608 }, "/model/layers.3/input_layernorm/output_3.out4_5": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 202752 }, "/model/layers.3/input_layernorm/output_0.out4_5": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 208896 }, "/model/layers.3/attn/qk_proj/MatMulNBits/output_0.out5_4_9": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 6144 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 215040 }, "/model/layers.3/attn/GroupQueryAttention/output_0.out2_3": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 227328 }, "/model/layers.3/attn/o_proj/MatMulNBits/output_0.out5_4_10": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 233472 }, "/model/layers.3/post_attention_layernorm/output_3.out4_6": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 239616 }, "/model/layers.3/post_attention_layernorm/output_0.out4_6": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 245760 }, "/model/layers.3/mlp/Mul/output_0.out3_3": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8192 ], "size_in_bytes": 16384, "op_tensor_size": 16384, "offset": 251904 }, "/model/layers.3/mlp/down_proj/MatMulNBits/output_0.out5_4_11": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 268288 }, "/model/layers.4/input_layernorm/output_3.out4_7": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 274432 }, "/model/layers.4/input_layernorm/output_0.out4_7": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 280576 }, "/model/layers.4/attn/qk_proj/MatMulNBits/output_0.out5_4_12": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 6144 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 286720 }, "/model/layers.4/attn/GroupQueryAttention/output_0.out2_4": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 299008 }, "/model/layers.4/attn/o_proj/MatMulNBits/output_0.out5_4_13": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 305152 }, "/model/layers.4/post_attention_layernorm/output_3.out4_8": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 311296 }, "/model/layers.4/post_attention_layernorm/output_0.out4_8": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 317440 }, "/model/layers.4/mlp/Mul/output_0.out3_4": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8192 ], "size_in_bytes": 16384, "op_tensor_size": 16384, "offset": 323584 }, "/model/layers.4/mlp/down_proj/MatMulNBits/output_0.out5_4_14": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 339968 }, "/model/layers.5/input_layernorm/output_3.out4_9": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 346112 }, "/model/layers.5/input_layernorm/output_0.out4_9": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 352256 }, "/model/layers.5/attn/qk_proj/MatMulNBits/output_0.out5_4_15": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 6144 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 358400 }, "/model/layers.5/attn/GroupQueryAttention/output_0.out2_5": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 370688 }, "/model/layers.5/attn/o_proj/MatMulNBits/output_0.out5_4_16": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 376832 }, "/model/layers.5/post_attention_layernorm/output_3.out4_10": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 382976 }, "/model/layers.5/post_attention_layernorm/output_0.out4_10": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 389120 }, "/model/layers.5/mlp/Mul/output_0.out3_5": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8192 ], "size_in_bytes": 16384, "op_tensor_size": 16384, "offset": 395264 }, "/model/layers.5/mlp/down_proj/MatMulNBits/output_0.out5_4_17": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 411648 }, "/model/layers.6/input_layernorm/output_3.out4_11": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 417792 }, "/model/layers.6/input_layernorm/output_0.out4_11": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 423936 }, "/model/layers.6/attn/qk_proj/MatMulNBits/output_0.out5_4_18": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 6144 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 430080 }, "/model/layers.6/attn/GroupQueryAttention/output_0.out2_6": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 442368 }, "/model/layers.6/attn/o_proj/MatMulNBits/output_0.out5_4_19": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 448512 }, "/model/layers.6/post_attention_layernorm/output_3.out4_12": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 454656 }, "/model/layers.6/post_attention_layernorm/output_0.out4_12": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 460800 }, "/model/layers.6/mlp/Mul/output_0.out3_6": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8192 ], "size_in_bytes": 16384, "op_tensor_size": 16384, "offset": 466944 }, "/model/layers.6/mlp/down_proj/MatMulNBits/output_0.out5_4_20": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 483328 }, "/model/layers.7/input_layernorm/output_3.out4_13": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 489472 }, "/model/layers.7/input_layernorm/output_0.out4_13": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 495616 }, "/model/layers.7/attn/qk_proj/MatMulNBits/output_0.out5_4_21": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 6144 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 501760 }, "/model/layers.7/attn/GroupQueryAttention/output_0.out2_7": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 514048 }, "/model/layers.7/attn/o_proj/MatMulNBits/output_0.out5_4_22": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 520192 }, "/model/layers.7/post_attention_layernorm/output_3.out4_14": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 526336 }, "/model/layers.7/post_attention_layernorm/output_0.out4_14": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 532480 }, "/model/layers.7/mlp/Mul/output_0.out3_7": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8192 ], "size_in_bytes": 16384, "op_tensor_size": 16384, "offset": 538624 }, "/model/layers.7/mlp/down_proj/MatMulNBits/output_0.out5_4_23": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 555008 }, "/model/layers.8/input_layernorm/output_3.out4_15": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 561152 }, "/model/layers.8/input_layernorm/output_0.out4_15": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 567296 }, "/model/layers.8/attn/qk_proj/MatMulNBits/output_0.out5_4_24": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 6144 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 573440 }, "/model/layers.8/attn/GroupQueryAttention/output_0.out2_8": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 585728 }, "/model/layers.8/attn/o_proj/MatMulNBits/output_0.out5_4_25": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 591872 }, "/model/layers.8/post_attention_layernorm/output_3.out4_16": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 598016 }, "/model/layers.8/post_attention_layernorm/output_0.out4_16": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 604160 }, "/model/layers.8/mlp/Mul/output_0.out3_8": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8192 ], "size_in_bytes": 16384, "op_tensor_size": 16384, "offset": 610304 }, "/model/layers.8/mlp/down_proj/MatMulNBits/output_0.out5_4_26": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 626688 }, "/model/layers.9/input_layernorm/output_3.out4_17": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 632832 }, "/model/layers.9/input_layernorm/output_0.out4_17": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 638976 }, "/model/layers.9/attn/qk_proj/MatMulNBits/output_0.out5_4_27": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 6144 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 645120 }, "/model/layers.9/attn/GroupQueryAttention/output_0.out2_9": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 657408 }, "/model/layers.9/attn/o_proj/MatMulNBits/output_0.out5_4_28": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 663552 }, "/model/layers.9/post_attention_layernorm/output_3.out4_18": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 669696 }, "/model/layers.9/post_attention_layernorm/output_0.out4_18": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 675840 }, "/model/layers.9/mlp/Mul/output_0.out3_9": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8192 ], "size_in_bytes": 16384, "op_tensor_size": 16384, "offset": 681984 }, "/model/layers.9/mlp/down_proj/MatMulNBits/output_0.out5_4_29": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 698368 }, "/model/layers.10/input_layernorm/output_3.out4_19": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 704512 }, "/model/layers.10/input_layernorm/output_0.out4_19": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 710656 }, "/model/layers.10/attn/qk_proj/MatMulNBits/output_0.out5_4_30": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 6144 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 716800 }, "/model/layers.10/attn/GroupQueryAttention/output_0.out2_10": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 729088 }, "/model/layers.10/attn/o_proj/MatMulNBits/output_0.out5_4_31": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 735232 }, "/model/layers.10/post_attention_layernorm/output_3.out4_20": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 741376 }, "/model/layers.10/post_attention_layernorm/output_0.out4_20": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 747520 }, "/model/layers.10/mlp/Mul/output_0.out3_10": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8192 ], "size_in_bytes": 16384, "op_tensor_size": 16384, "offset": 753664 }, "/model/layers.10/mlp/down_proj/MatMulNBits/output_0.out5_4_32": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 770048 }, "/model/layers.11/input_layernorm/output_3.out4_21": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 776192 }, "/model/layers.11/input_layernorm/output_0.out4_21": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 782336 }, "/model/layers.11/attn/qk_proj/MatMulNBits/output_0.out5_4_33": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 6144 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 788480 }, "/model/layers.11/attn/GroupQueryAttention/output_0.out2_11": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 800768 }, "/model/layers.11/attn/o_proj/MatMulNBits/output_0.out5_4_34": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 806912 }, "/model/layers.11/post_attention_layernorm/output_3.out4_22": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 813056 }, "/model/layers.11/post_attention_layernorm/output_0.out4_22": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 819200 }, "/model/layers.11/mlp/Mul/output_0.out3_11": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8192 ], "size_in_bytes": 16384, "op_tensor_size": 16384, "offset": 825344 }, "/model/layers.11/mlp/down_proj/MatMulNBits/output_0.out5_4_35": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 841728 }, "/model/layers.12/input_layernorm/output_3.out4_23": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 847872 }, "/model/layers.12/input_layernorm/output_0.out4_23": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 854016 }, "/model/layers.12/attn/qk_proj/MatMulNBits/output_0.out5_4_36": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 6144 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 860160 }, "/model/layers.12/attn/GroupQueryAttention/output_0.out2_12": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 872448 }, "/model/layers.12/attn/o_proj/MatMulNBits/output_0.out5_4_37": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 878592 }, "/model/layers.12/post_attention_layernorm/output_3.out4_24": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 884736 }, "/model/layers.12/post_attention_layernorm/output_0.out4_24": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 890880 }, "/model/layers.12/mlp/Mul/output_0.out3_12": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8192 ], "size_in_bytes": 16384, "op_tensor_size": 16384, "offset": 897024 }, "/model/layers.12/mlp/down_proj/MatMulNBits/output_0.out5_4_38": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 913408 }, "/model/layers.13/input_layernorm/output_3.out4_25": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 919552 }, "/model/layers.13/input_layernorm/output_0.out4_25": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 925696 }, "/model/layers.13/attn/qk_proj/MatMulNBits/output_0.out5_4_39": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 6144 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 931840 }, "/model/layers.13/attn/GroupQueryAttention/output_0.out2_13": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 944128 }, "/model/layers.13/attn/o_proj/MatMulNBits/output_0.out5_4_40": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 950272 }, "/model/layers.13/post_attention_layernorm/output_3.out4_26": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 956416 }, "/model/layers.13/post_attention_layernorm/output_0.out4_26": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 962560 }, "/model/layers.13/mlp/Mul/output_0.out3_13": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8192 ], "size_in_bytes": 16384, "op_tensor_size": 16384, "offset": 968704 }, "/model/layers.13/mlp/down_proj/MatMulNBits/output_0.out5_4_41": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 985088 }, "/model/layers.14/input_layernorm/output_3.out4_27": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 991232 }, "/model/layers.14/input_layernorm/output_0.out4_27": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 997376 }, "/model/layers.14/attn/qk_proj/MatMulNBits/output_0.out5_4_42": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 6144 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 1003520 }, "/model/layers.14/attn/GroupQueryAttention/output_0.out2_14": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1015808 }, "/model/layers.14/attn/o_proj/MatMulNBits/output_0.out5_4_43": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1021952 }, "/model/layers.14/post_attention_layernorm/output_3.out4_28": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1028096 }, "/model/layers.14/post_attention_layernorm/output_0.out4_28": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1034240 }, "/model/layers.14/mlp/Mul/output_0.out3_14": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8192 ], "size_in_bytes": 16384, "op_tensor_size": 16384, "offset": 1040384 }, "/model/layers.14/mlp/down_proj/MatMulNBits/output_0.out5_4_44": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1056768 }, "/model/layers.15/input_layernorm/output_3.out4_29": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1062912 }, "/model/layers.15/input_layernorm/output_0.out4_29": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1069056 }, "/model/layers.15/attn/qk_proj/MatMulNBits/output_0.out5_4_45": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 6144 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 1075200 }, "/model/layers.15/attn/GroupQueryAttention/output_0.out2_15": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1087488 }, "/model/layers.15/attn/o_proj/MatMulNBits/output_0.out5_4_46": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1093632 }, "/model/layers.15/post_attention_layernorm/output_3.out4_30": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1099776 }, "/model/layers.15/post_attention_layernorm/output_0.out4_30": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1105920 }, "/model/layers.15/mlp/Mul/output_0.out3_15": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8192 ], "size_in_bytes": 16384, "op_tensor_size": 16384, "offset": 1112064 }, "/model/layers.15/mlp/down_proj/MatMulNBits/output_0.out5_4_47": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1128448 }, "/model/layers.16/input_layernorm/output_3.out4_31": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1134592 }, "/model/layers.16/input_layernorm/output_0.out4_31": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1140736 }, "/model/layers.16/attn/qk_proj/MatMulNBits/output_0.out5_4_48": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 6144 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 1146880 }, "/model/layers.16/attn/GroupQueryAttention/output_0.out2_16": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1159168 }, "/model/layers.16/attn/o_proj/MatMulNBits/output_0.out5_4_49": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1165312 }, "/model/layers.16/post_attention_layernorm/output_3.out4_32": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1171456 }, "/model/layers.16/post_attention_layernorm/output_0.out4_32": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1177600 }, "/model/layers.16/mlp/Mul/output_0.out3_16": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8192 ], "size_in_bytes": 16384, "op_tensor_size": 16384, "offset": 1183744 }, "/model/layers.16/mlp/down_proj/MatMulNBits/output_0.out5_4_50": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1200128 }, "/model/layers.17/input_layernorm/output_3.out4_33": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1206272 }, "/model/layers.17/input_layernorm/output_0.out4_33": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1212416 }, "/model/layers.17/attn/qk_proj/MatMulNBits/output_0.out5_4_51": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 6144 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 1218560 }, "/model/layers.17/attn/GroupQueryAttention/output_0.out2_17": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1230848 }, "/model/layers.17/attn/o_proj/MatMulNBits/output_0.out5_4_52": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1236992 }, "/model/layers.17/post_attention_layernorm/output_3.out4_34": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1243136 }, "/model/layers.17/post_attention_layernorm/output_0.out4_34": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1249280 }, "/model/layers.17/mlp/Mul/output_0.out3_17": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8192 ], "size_in_bytes": 16384, "op_tensor_size": 16384, "offset": 1255424 }, "/model/layers.17/mlp/down_proj/MatMulNBits/output_0.out5_4_53": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1271808 }, "/model/layers.18/input_layernorm/output_3.out4_35": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1277952 }, "/model/layers.18/input_layernorm/output_0.out4_35": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1284096 }, "/model/layers.18/attn/qk_proj/MatMulNBits/output_0.out5_4_54": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 6144 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 1290240 }, "/model/layers.18/attn/GroupQueryAttention/output_0.out2_18": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1302528 }, "/model/layers.18/attn/o_proj/MatMulNBits/output_0.out5_4_55": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1308672 }, "/model/layers.18/post_attention_layernorm/output_3.out4_36": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1314816 }, "/model/layers.18/post_attention_layernorm/output_0.out4_36": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1320960 }, "/model/layers.18/mlp/Mul/output_0.out3_18": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8192 ], "size_in_bytes": 16384, "op_tensor_size": 16384, "offset": 1327104 }, "/model/layers.18/mlp/down_proj/MatMulNBits/output_0.out5_4_56": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1343488 }, "/model/layers.19/input_layernorm/output_3.out4_37": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1349632 }, "/model/layers.19/input_layernorm/output_0.out4_37": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1355776 }, "/model/layers.19/attn/qk_proj/MatMulNBits/output_0.out5_4_57": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 6144 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 1361920 }, "/model/layers.19/attn/GroupQueryAttention/output_0.out2_19": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1374208 }, "/model/layers.19/attn/o_proj/MatMulNBits/output_0.out5_4_58": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1380352 }, "/model/layers.19/post_attention_layernorm/output_3.out4_38": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1386496 }, "/model/layers.19/post_attention_layernorm/output_0.out4_38": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1392640 }, "/model/layers.19/mlp/Mul/output_0.out3_19": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8192 ], "size_in_bytes": 16384, "op_tensor_size": 16384, "offset": 1398784 }, "/model/layers.19/mlp/down_proj/MatMulNBits/output_0.out5_4_59": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1415168 }, "/model/layers.20/input_layernorm/output_3.out4_39": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1421312 }, "/model/layers.20/input_layernorm/output_0.out4_39": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1427456 }, "/model/layers.20/attn/qk_proj/MatMulNBits/output_0.out5_4_60": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 6144 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 1433600 }, "/model/layers.20/attn/GroupQueryAttention/output_0.out2_20": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1445888 }, "/model/layers.20/attn/o_proj/MatMulNBits/output_0.out5_4_61": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1452032 }, "/model/layers.20/post_attention_layernorm/output_3.out4_40": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1458176 }, "/model/layers.20/post_attention_layernorm/output_0.out4_40": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1464320 }, "/model/layers.20/mlp/Mul/output_0.out3_20": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8192 ], "size_in_bytes": 16384, "op_tensor_size": 16384, "offset": 1470464 }, "/model/layers.20/mlp/down_proj/MatMulNBits/output_0.out5_4_62": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1486848 }, "/model/layers.21/input_layernorm/output_3.out4_41": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1492992 }, "/model/layers.21/input_layernorm/output_0.out4_41": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1499136 }, "/model/layers.21/attn/qk_proj/MatMulNBits/output_0.out5_4_63": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 6144 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 1505280 }, "/model/layers.21/attn/GroupQueryAttention/output_0.out2_21": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1517568 }, "/model/layers.21/attn/o_proj/MatMulNBits/output_0.out5_4_64": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1523712 }, "/model/layers.21/post_attention_layernorm/output_3.out4_42": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1529856 }, "/model/layers.21/post_attention_layernorm/output_0.out4_42": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1536000 }, "/model/layers.21/mlp/Mul/output_0.out3_21": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8192 ], "size_in_bytes": 16384, "op_tensor_size": 16384, "offset": 1542144 }, "/model/layers.21/mlp/down_proj/MatMulNBits/output_0.out5_4_65": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1558528 }, "/model/layers.22/input_layernorm/output_3.out4_43": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1564672 }, "/model/layers.22/input_layernorm/output_0.out4_43": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1570816 }, "/model/layers.22/attn/qk_proj/MatMulNBits/output_0.out5_4_66": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 6144 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 1576960 }, "/model/layers.22/attn/GroupQueryAttention/output_0.out2_22": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1589248 }, "/model/layers.22/attn/o_proj/MatMulNBits/output_0.out5_4_67": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1595392 }, "/model/layers.22/post_attention_layernorm/output_3.out4_44": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1601536 }, "/model/layers.22/post_attention_layernorm/output_0.out4_44": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1607680 }, "/model/layers.22/mlp/Mul/output_0.out3_22": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8192 ], "size_in_bytes": 16384, "op_tensor_size": 16384, "offset": 1613824 }, "/model/layers.22/mlp/down_proj/MatMulNBits/output_0.out5_4_68": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1630208 }, "/model/layers.23/input_layernorm/output_3.out4_45": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1636352 }, "/model/layers.23/input_layernorm/output_0.out4_45": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1642496 }, "/model/layers.23/attn/qk_proj/MatMulNBits/output_0.out5_4_69": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 6144 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 1648640 }, "/model/layers.23/attn/GroupQueryAttention/output_0.out2_23": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1660928 }, "/model/layers.23/attn/o_proj/MatMulNBits/output_0.out5_4_70": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1667072 }, "/model/layers.23/post_attention_layernorm/output_3.out4_46": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1673216 }, "/model/layers.23/post_attention_layernorm/output_0.out4_46": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1679360 }, "/model/layers.23/mlp/Mul/output_0.out3_23": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8192 ], "size_in_bytes": 16384, "op_tensor_size": 16384, "offset": 1685504 }, "/model/layers.23/mlp/down_proj/MatMulNBits/output_0.out5_4_71": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1701888 }, "/model/layers.24/input_layernorm/output_3.out4_47": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1708032 }, "/model/layers.24/input_layernorm/output_0.out4_47": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1714176 }, "/model/layers.24/attn/qk_proj/MatMulNBits/output_0.out5_4_72": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 6144 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 1720320 }, "/model/layers.24/attn/GroupQueryAttention/output_0.out2_24": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1732608 }, "/model/layers.24/attn/o_proj/MatMulNBits/output_0.out5_4_73": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1738752 }, "/model/layers.24/post_attention_layernorm/output_3.out4_48": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1744896 }, "/model/layers.24/post_attention_layernorm/output_0.out4_48": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1751040 }, "/model/layers.24/mlp/Mul/output_0.out3_24": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8192 ], "size_in_bytes": 16384, "op_tensor_size": 16384, "offset": 1757184 }, "/model/layers.24/mlp/down_proj/MatMulNBits/output_0.out5_4_74": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1773568 }, "/model/layers.25/input_layernorm/output_3.out4_49": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1779712 }, "/model/layers.25/input_layernorm/output_0.out4_49": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1785856 }, "/model/layers.25/attn/qk_proj/MatMulNBits/output_0.out5_4_75": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 6144 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 1792000 }, "/model/layers.25/attn/GroupQueryAttention/output_0.out2_25": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1804288 }, "/model/layers.25/attn/o_proj/MatMulNBits/output_0.out5_4_76": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1810432 }, "/model/layers.25/post_attention_layernorm/output_3.out4_50": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1816576 }, "/model/layers.25/post_attention_layernorm/output_0.out4_50": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1822720 }, "/model/layers.25/mlp/Mul/output_0.out3_25": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8192 ], "size_in_bytes": 16384, "op_tensor_size": 16384, "offset": 1828864 }, "/model/layers.25/mlp/down_proj/MatMulNBits/output_0.out5_4_77": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1845248 }, "/model/layers.26/input_layernorm/output_3.out4_51": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1851392 }, "/model/layers.26/input_layernorm/output_0.out4_51": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1857536 }, "/model/layers.26/attn/qk_proj/MatMulNBits/output_0.out5_4_78": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 6144 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 1863680 }, "/model/layers.26/attn/GroupQueryAttention/output_0.out2_26": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1875968 }, "/model/layers.26/attn/o_proj/MatMulNBits/output_0.out5_4_79": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1882112 }, "/model/layers.26/post_attention_layernorm/output_3.out4_52": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1888256 }, "/model/layers.26/post_attention_layernorm/output_0.out4_52": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1894400 }, "/model/layers.26/mlp/Mul/output_0.out3_26": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8192 ], "size_in_bytes": 16384, "op_tensor_size": 16384, "offset": 1900544 }, "/model/layers.26/mlp/down_proj/MatMulNBits/output_0.out5_4_80": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1916928 }, "/model/layers.27/input_layernorm/output_3.out4_53": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1923072 }, "/model/layers.27/input_layernorm/output_0.out4_53": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1929216 }, "/model/layers.27/attn/qk_proj/MatMulNBits/output_0.out5_4_81": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 6144 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 1935360 }, "/model/layers.27/attn/GroupQueryAttention/output_0.out2_27": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1947648 }, "/model/layers.27/attn/o_proj/MatMulNBits/output_0.out5_4_82": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1953792 }, "/model/layers.27/post_attention_layernorm/output_3.out4_54": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1959936 }, "/model/layers.27/post_attention_layernorm/output_0.out4_54": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1966080 }, "/model/layers.27/mlp/Mul/output_0.out3_27": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8192 ], "size_in_bytes": 16384, "op_tensor_size": 16384, "offset": 1972224 }, "/model/layers.27/mlp/down_proj/MatMulNBits/output_0.out5_4_83": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1988608 }, "/model/layers.28/input_layernorm/output_3.out4_55": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1994752 }, "/model/layers.28/input_layernorm/output_0.out4_55": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 2000896 }, "/model/layers.28/attn/qk_proj/MatMulNBits/output_0.out5_4_84": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 6144 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 2007040 }, "/model/layers.28/attn/GroupQueryAttention/output_0.out2_28": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 2019328 }, "/model/layers.28/attn/o_proj/MatMulNBits/output_0.out5_4_85": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 2025472 }, "/model/layers.28/post_attention_layernorm/output_3.out4_56": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 2031616 }, "/model/layers.28/post_attention_layernorm/output_0.out4_56": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 2037760 }, "/model/layers.28/mlp/Mul/output_0.out3_28": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8192 ], "size_in_bytes": 16384, "op_tensor_size": 16384, "offset": 2043904 }, "/model/layers.28/mlp/down_proj/MatMulNBits/output_0.out5_4_86": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 2060288 }, "/model/layers.29/input_layernorm/output_3.out4_57": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 2066432 }, "/model/layers.29/input_layernorm/output_0.out4_57": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 2072576 }, "/model/layers.29/attn/qk_proj/MatMulNBits/output_0.out5_4_87": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 6144 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 2078720 }, "/model/layers.29/attn/GroupQueryAttention/output_0.out2_29": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 2091008 }, "/model/layers.29/attn/o_proj/MatMulNBits/output_0.out5_4_88": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 2097152 }, "/model/layers.29/post_attention_layernorm/output_3.out4_58": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 2103296 }, "/model/layers.29/post_attention_layernorm/output_0.out4_58": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 2109440 }, "/model/layers.29/mlp/Mul/output_0.out3_29": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8192 ], "size_in_bytes": 16384, "op_tensor_size": 16384, "offset": 2115584 }, "/model/layers.29/mlp/down_proj/MatMulNBits/output_0.out5_4_89": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 2131968 }, "/model/layers.30/input_layernorm/output_3.out4_59": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 2138112 }, "/model/layers.30/input_layernorm/output_0.out4_59": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 2144256 }, "/model/layers.30/attn/qk_proj/MatMulNBits/output_0.out5_4_90": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 6144 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 2150400 }, "/model/layers.30/attn/GroupQueryAttention/output_0.out2_30": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 2162688 }, "/model/layers.30/attn/o_proj/MatMulNBits/output_0.out5_4_91": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 2168832 }, "/model/layers.30/post_attention_layernorm/output_3.out4_60": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 2174976 }, "/model/layers.30/post_attention_layernorm/output_0.out4_60": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 2181120 }, "/model/layers.30/mlp/Mul/output_0.out3_30": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8192 ], "size_in_bytes": 16384, "op_tensor_size": 16384, "offset": 2187264 }, "/model/layers.30/mlp/down_proj/MatMulNBits/output_0.out5_4_92": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 2203648 }, "/model/layers.31/input_layernorm/output_3.out4_61": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 2209792 }, "/model/layers.31/input_layernorm/output_0.out4_61": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 2215936 }, "/model/layers.31/attn/qk_proj/MatMulNBits/output_0.out5_4_93": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 6144 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 2222080 }, "/model/layers.31/attn/GroupQueryAttention/output_0.out2_31": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 2234368 }, "/model/layers.31/attn/o_proj/MatMulNBits/output_0.out5_4_94": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 2240512 }, "/model/layers.31/post_attention_layernorm/output_3.out4_62": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 2246656 }, "/model/layers.31/post_attention_layernorm/output_0.out4_62": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 2252800 }, "/model/layers.31/mlp/Mul/output_0.out3_31": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8192 ], "size_in_bytes": 16384, "op_tensor_size": 16384, "offset": 2258944 }, "/model/layers.31/mlp/down_proj/MatMulNBits/output_0.out5_4_95": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 2275328 }, "/model/layers.32/final_norm_layernorm/output_0.out4_63": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 2281472 }, "model.layers.0.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 6144 ], "size_in_bytes": 18874368, "op_tensor_size": 18874368, "offset": 0, "file_name": ".cache\\MatMulNBits_2_0_0.const", "file_size": 18874368 }, "model.layers.0.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 6144 ], "size_in_bytes": 24576, "op_tensor_size": 24576, "offset": 18874368, "file_name": ".cache\\MatMulNBits_2_0_1.const", "file_size": 24576 }, "model.layers.0.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 147456 ], "size_in_bytes": 589824, "op_tensor_size": 589824, "offset": 18898944, "file_name": ".cache\\MatMulNBits_2_0_2.const", "file_size": 589824 }, "model.layers.0.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 147456 ], "size_in_bytes": 147456, "op_tensor_size": 147456, "offset": 19488768, "file_name": ".cache\\MatMulNBits_2_0_3.const", "file_size": 147456 }, "model.layers.0.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 19636224, "file_name": ".cache\\MatMulNBits_2_0_4.const", "file_size": 9437184 }, "model.layers.0.attn.v_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 29073408, "file_name": ".cache\\MatMulNBits_2_0_5.const", "file_size": 12288 }, "model.layers.0.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 29085696, "file_name": ".cache\\MatMulNBits_2_0_6.const", "file_size": 294912 }, "model.layers.0.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 29380608, "file_name": ".cache\\MatMulNBits_2_0_7.const", "file_size": 73728 }, "model.layers.0.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 29454336, "file_name": ".cache\\MatMulNBits_2_0_8.const", "file_size": 9437184 }, "model.layers.0.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 38891520, "file_name": ".cache\\MatMulNBits_2_0_9.const", "file_size": 12288 }, "model.layers.0.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 38903808, "file_name": ".cache\\MatMulNBits_2_0_10.const", "file_size": 294912 }, "model.layers.0.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 39198720, "file_name": ".cache\\MatMulNBits_2_0_11.const", "file_size": 73728 }, "model.layers.0.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 39272448, "file_name": ".cache\\MatMulNBits_2_0_12.const", "file_size": 6144 }, "model.layers.0.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 39278592, "file_name": ".cache\\MatMulNBits_2_0_13.const", "file_size": 12582912 }, "model.layers.0.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 51861504, "file_name": ".cache\\MatMulNBits_2_0_14.const", "file_size": 786432 }, "model.layers.0.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 52647936, "file_name": ".cache\\MatMulNBits_2_0_15.const", "file_size": 98304 }, "model.layers.0.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 52746240, "file_name": ".cache\\MatMulNBits_2_0_16.const", "file_size": 32768 }, "model.layers.0.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 52779008, "file_name": ".cache\\MatMulNBits_2_0_17.const", "file_size": 12582912 }, "model.layers.0.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 65361920, "file_name": ".cache\\MatMulNBits_2_0_18.const", "file_size": 786432 }, "model.layers.0.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 66148352, "file_name": ".cache\\MatMulNBits_2_0_19.const", "file_size": 98304 }, "model.layers.0.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 66246656, "file_name": ".cache\\MatMulNBits_2_0_20.const", "file_size": 32768 }, "model.layers.0.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8192, 3072 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 66279424, "file_name": ".cache\\MatMulNBits_2_0_21.const", "file_size": 25165824 }, "model.layers.0.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 91445248, "file_name": ".cache\\MatMulNBits_2_0_22.const", "file_size": 12288 }, "model.layers.0.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 91457536, "file_name": ".cache\\MatMulNBits_2_0_23.const", "file_size": 786432 }, "model.layers.0.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 196608 ], "size_in_bytes": 196608, "op_tensor_size": 196608, "offset": 92243968, "file_name": ".cache\\MatMulNBits_2_0_24.const", "file_size": 196608 }, "model.layers.1.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 92440576, "file_name": ".cache\\MatMulNBits_2_0_25.const", "file_size": 6144 }, "model.layers.1.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 6144 ], "size_in_bytes": 18874368, "op_tensor_size": 18874368, "offset": 92446720, "file_name": ".cache\\MatMulNBits_2_0_26.const", "file_size": 18874368 }, "model.layers.1.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 6144 ], "size_in_bytes": 24576, "op_tensor_size": 24576, "offset": 111321088, "file_name": ".cache\\MatMulNBits_2_0_27.const", "file_size": 24576 }, "model.layers.1.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 147456 ], "size_in_bytes": 589824, "op_tensor_size": 589824, "offset": 111345664, "file_name": ".cache\\MatMulNBits_2_0_28.const", "file_size": 589824 }, "model.layers.1.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 147456 ], "size_in_bytes": 147456, "op_tensor_size": 147456, "offset": 111935488, "file_name": ".cache\\MatMulNBits_2_0_29.const", "file_size": 147456 }, "model.layers.1.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 112082944, "file_name": ".cache\\MatMulNBits_2_0_30.const", "file_size": 9437184 }, "model.layers.1.attn.v_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 121520128, "file_name": ".cache\\MatMulNBits_2_0_31.const", "file_size": 12288 }, "model.layers.1.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 121532416, "file_name": ".cache\\MatMulNBits_2_0_32.const", "file_size": 294912 }, "model.layers.1.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 121827328, "file_name": ".cache\\MatMulNBits_2_0_33.const", "file_size": 73728 }, "model.layers.1.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 121901056, "file_name": ".cache\\MatMulNBits_2_0_34.const", "file_size": 9437184 }, "model.layers.1.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 131338240, "file_name": ".cache\\MatMulNBits_2_0_35.const", "file_size": 12288 }, "model.layers.1.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 131350528, "file_name": ".cache\\MatMulNBits_2_0_36.const", "file_size": 294912 }, "model.layers.1.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 131645440, "file_name": ".cache\\MatMulNBits_2_0_37.const", "file_size": 73728 }, "model.layers.1.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 131719168, "file_name": ".cache\\MatMulNBits_2_0_38.const", "file_size": 6144 }, "model.layers.1.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 131725312, "file_name": ".cache\\MatMulNBits_2_0_39.const", "file_size": 12582912 }, "model.layers.1.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 144308224, "file_name": ".cache\\MatMulNBits_2_0_40.const", "file_size": 786432 }, "model.layers.1.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 145094656, "file_name": ".cache\\MatMulNBits_2_0_41.const", "file_size": 98304 }, "model.layers.1.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 145192960, "file_name": ".cache\\MatMulNBits_2_0_42.const", "file_size": 32768 }, "model.layers.1.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 145225728, "file_name": ".cache\\MatMulNBits_2_0_43.const", "file_size": 12582912 }, "model.layers.1.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 157808640, "file_name": ".cache\\MatMulNBits_2_0_44.const", "file_size": 786432 }, "model.layers.1.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 158595072, "file_name": ".cache\\MatMulNBits_2_0_45.const", "file_size": 98304 }, "model.layers.1.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 158693376, "file_name": ".cache\\MatMulNBits_2_0_46.const", "file_size": 32768 }, "model.layers.1.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8192, 3072 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 158726144, "file_name": ".cache\\MatMulNBits_2_0_47.const", "file_size": 25165824 }, "model.layers.1.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 183891968, "file_name": ".cache\\MatMulNBits_2_0_48.const", "file_size": 12288 }, "model.layers.1.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 183904256, "file_name": ".cache\\MatMulNBits_2_0_49.const", "file_size": 786432 }, "model.layers.1.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 196608 ], "size_in_bytes": 196608, "op_tensor_size": 196608, "offset": 184690688, "file_name": ".cache\\MatMulNBits_2_0_50.const", "file_size": 196608 }, "model.layers.2.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 184887296, "file_name": ".cache\\MatMulNBits_2_0_51.const", "file_size": 6144 }, "model.layers.2.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 6144 ], "size_in_bytes": 18874368, "op_tensor_size": 18874368, "offset": 184893440, "file_name": ".cache\\MatMulNBits_2_0_52.const", "file_size": 18874368 }, "model.layers.2.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 6144 ], "size_in_bytes": 24576, "op_tensor_size": 24576, "offset": 203767808, "file_name": ".cache\\MatMulNBits_2_0_53.const", "file_size": 24576 }, "model.layers.2.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 147456 ], "size_in_bytes": 589824, "op_tensor_size": 589824, "offset": 203792384, "file_name": ".cache\\MatMulNBits_2_0_54.const", "file_size": 589824 }, "model.layers.2.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 147456 ], "size_in_bytes": 147456, "op_tensor_size": 147456, "offset": 204382208, "file_name": ".cache\\MatMulNBits_2_0_55.const", "file_size": 147456 }, "model.layers.2.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 204529664, "file_name": ".cache\\MatMulNBits_2_0_56.const", "file_size": 9437184 }, "model.layers.2.attn.v_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 213966848, "file_name": ".cache\\MatMulNBits_2_0_57.const", "file_size": 12288 }, "model.layers.2.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 213979136, "file_name": ".cache\\MatMulNBits_2_0_58.const", "file_size": 294912 }, "model.layers.2.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 214274048, "file_name": ".cache\\MatMulNBits_2_0_59.const", "file_size": 73728 }, "model.layers.2.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 214347776, "file_name": ".cache\\MatMulNBits_2_0_60.const", "file_size": 9437184 }, "model.layers.2.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 223784960, "file_name": ".cache\\MatMulNBits_2_0_61.const", "file_size": 12288 }, "model.layers.2.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 223797248, "file_name": ".cache\\MatMulNBits_2_0_62.const", "file_size": 294912 }, "model.layers.2.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 224092160, "file_name": ".cache\\MatMulNBits_2_0_63.const", "file_size": 73728 }, "model.layers.2.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 224165888, "file_name": ".cache\\MatMulNBits_2_0_64.const", "file_size": 6144 }, "model.layers.2.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 224172032, "file_name": ".cache\\MatMulNBits_2_0_65.const", "file_size": 12582912 }, "model.layers.2.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 236754944, "file_name": ".cache\\MatMulNBits_2_0_66.const", "file_size": 786432 }, "model.layers.2.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 237541376, "file_name": ".cache\\MatMulNBits_2_0_67.const", "file_size": 98304 }, "model.layers.2.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 237639680, "file_name": ".cache\\MatMulNBits_2_0_68.const", "file_size": 32768 }, "model.layers.2.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 237672448, "file_name": ".cache\\MatMulNBits_2_0_69.const", "file_size": 12582912 }, "model.layers.2.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 250255360, "file_name": ".cache\\MatMulNBits_2_0_70.const", "file_size": 786432 }, "model.layers.2.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 251041792, "file_name": ".cache\\MatMulNBits_2_0_71.const", "file_size": 98304 }, "model.layers.2.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 251140096, "file_name": ".cache\\MatMulNBits_2_0_72.const", "file_size": 32768 }, "model.layers.2.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8192, 3072 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 251172864, "file_name": ".cache\\MatMulNBits_2_0_73.const", "file_size": 25165824 }, "model.layers.2.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 276338688, "file_name": ".cache\\MatMulNBits_2_0_74.const", "file_size": 12288 }, "model.layers.2.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 276350976, "file_name": ".cache\\MatMulNBits_2_0_75.const", "file_size": 786432 }, "model.layers.2.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 196608 ], "size_in_bytes": 196608, "op_tensor_size": 196608, "offset": 277137408, "file_name": ".cache\\MatMulNBits_2_0_76.const", "file_size": 196608 }, "model.layers.3.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 277334016, "file_name": ".cache\\MatMulNBits_2_0_77.const", "file_size": 6144 }, "model.layers.3.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 6144 ], "size_in_bytes": 18874368, "op_tensor_size": 18874368, "offset": 277340160, "file_name": ".cache\\MatMulNBits_2_0_78.const", "file_size": 18874368 }, "model.layers.3.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 6144 ], "size_in_bytes": 24576, "op_tensor_size": 24576, "offset": 296214528, "file_name": ".cache\\MatMulNBits_2_0_79.const", "file_size": 24576 }, "model.layers.3.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 147456 ], "size_in_bytes": 589824, "op_tensor_size": 589824, "offset": 296239104, "file_name": ".cache\\MatMulNBits_2_0_80.const", "file_size": 589824 }, "model.layers.3.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 147456 ], "size_in_bytes": 147456, "op_tensor_size": 147456, "offset": 296828928, "file_name": ".cache\\MatMulNBits_2_0_81.const", "file_size": 147456 }, "model.layers.3.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 296976384, "file_name": ".cache\\MatMulNBits_2_0_82.const", "file_size": 9437184 }, "model.layers.3.attn.v_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 306413568, "file_name": ".cache\\MatMulNBits_2_0_83.const", "file_size": 12288 }, "model.layers.3.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 306425856, "file_name": ".cache\\MatMulNBits_2_0_84.const", "file_size": 294912 }, "model.layers.3.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 306720768, "file_name": ".cache\\MatMulNBits_2_0_85.const", "file_size": 73728 }, "model.layers.3.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 306794496, "file_name": ".cache\\MatMulNBits_2_0_86.const", "file_size": 9437184 }, "model.layers.3.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 316231680, "file_name": ".cache\\MatMulNBits_2_0_87.const", "file_size": 12288 }, "model.layers.3.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 316243968, "file_name": ".cache\\MatMulNBits_2_0_88.const", "file_size": 294912 }, "model.layers.3.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 316538880, "file_name": ".cache\\MatMulNBits_2_0_89.const", "file_size": 73728 }, "model.layers.3.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 316612608, "file_name": ".cache\\MatMulNBits_2_0_90.const", "file_size": 6144 }, "model.layers.3.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 316618752, "file_name": ".cache\\MatMulNBits_2_0_91.const", "file_size": 12582912 }, "model.layers.3.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 329201664, "file_name": ".cache\\MatMulNBits_2_0_92.const", "file_size": 786432 }, "model.layers.3.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 329988096, "file_name": ".cache\\MatMulNBits_2_0_93.const", "file_size": 98304 }, "model.layers.3.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 330086400, "file_name": ".cache\\MatMulNBits_2_0_94.const", "file_size": 32768 }, "model.layers.3.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 330119168, "file_name": ".cache\\MatMulNBits_2_0_95.const", "file_size": 12582912 }, "model.layers.3.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 342702080, "file_name": ".cache\\MatMulNBits_2_0_96.const", "file_size": 786432 }, "model.layers.3.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 343488512, "file_name": ".cache\\MatMulNBits_2_0_97.const", "file_size": 98304 }, "model.layers.3.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 343586816, "file_name": ".cache\\MatMulNBits_2_0_98.const", "file_size": 32768 }, "model.layers.3.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8192, 3072 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 343619584, "file_name": ".cache\\MatMulNBits_2_0_99.const", "file_size": 25165824 }, "model.layers.3.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 368785408, "file_name": ".cache\\MatMulNBits_2_0_100.const", "file_size": 12288 }, "model.layers.3.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 368797696, "file_name": ".cache\\MatMulNBits_2_0_101.const", "file_size": 786432 }, "model.layers.3.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 196608 ], "size_in_bytes": 196608, "op_tensor_size": 196608, "offset": 369584128, "file_name": ".cache\\MatMulNBits_2_0_102.const", "file_size": 196608 }, "model.layers.4.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 369780736, "file_name": ".cache\\MatMulNBits_2_0_103.const", "file_size": 6144 }, "model.layers.4.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 6144 ], "size_in_bytes": 18874368, "op_tensor_size": 18874368, "offset": 369786880, "file_name": ".cache\\MatMulNBits_2_0_104.const", "file_size": 18874368 }, "model.layers.4.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 6144 ], "size_in_bytes": 24576, "op_tensor_size": 24576, "offset": 388661248, "file_name": ".cache\\MatMulNBits_2_0_105.const", "file_size": 24576 }, "model.layers.4.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 147456 ], "size_in_bytes": 589824, "op_tensor_size": 589824, "offset": 388685824, "file_name": ".cache\\MatMulNBits_2_0_106.const", "file_size": 589824 }, "model.layers.4.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 147456 ], "size_in_bytes": 147456, "op_tensor_size": 147456, "offset": 389275648, "file_name": ".cache\\MatMulNBits_2_0_107.const", "file_size": 147456 }, "model.layers.4.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 389423104, "file_name": ".cache\\MatMulNBits_2_0_108.const", "file_size": 9437184 }, "model.layers.4.attn.v_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 398860288, "file_name": ".cache\\MatMulNBits_2_0_109.const", "file_size": 12288 }, "model.layers.4.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 398872576, "file_name": ".cache\\MatMulNBits_2_0_110.const", "file_size": 294912 }, "model.layers.4.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 399167488, "file_name": ".cache\\MatMulNBits_2_0_111.const", "file_size": 73728 }, "model.layers.4.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 399241216, "file_name": ".cache\\MatMulNBits_2_0_112.const", "file_size": 9437184 }, "model.layers.4.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 408678400, "file_name": ".cache\\MatMulNBits_2_0_113.const", "file_size": 12288 }, "model.layers.4.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 408690688, "file_name": ".cache\\MatMulNBits_2_0_114.const", "file_size": 294912 }, "model.layers.4.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 408985600, "file_name": ".cache\\MatMulNBits_2_0_115.const", "file_size": 73728 }, "model.layers.4.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 409059328, "file_name": ".cache\\MatMulNBits_2_0_116.const", "file_size": 6144 }, "model.layers.4.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 409065472, "file_name": ".cache\\MatMulNBits_2_0_117.const", "file_size": 12582912 }, "model.layers.4.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 421648384, "file_name": ".cache\\MatMulNBits_2_0_118.const", "file_size": 786432 }, "model.layers.4.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 422434816, "file_name": ".cache\\MatMulNBits_2_0_119.const", "file_size": 98304 }, "model.layers.4.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 422533120, "file_name": ".cache\\MatMulNBits_2_0_120.const", "file_size": 32768 }, "model.layers.4.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 422565888, "file_name": ".cache\\MatMulNBits_2_0_121.const", "file_size": 12582912 }, "model.layers.4.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 435148800, "file_name": ".cache\\MatMulNBits_2_0_122.const", "file_size": 786432 }, "model.layers.4.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 435935232, "file_name": ".cache\\MatMulNBits_2_0_123.const", "file_size": 98304 }, "model.layers.4.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 436033536, "file_name": ".cache\\MatMulNBits_2_0_124.const", "file_size": 32768 }, "model.layers.4.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8192, 3072 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 436066304, "file_name": ".cache\\MatMulNBits_2_0_125.const", "file_size": 25165824 }, "model.layers.4.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 461232128, "file_name": ".cache\\MatMulNBits_2_0_126.const", "file_size": 12288 }, "model.layers.4.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 461244416, "file_name": ".cache\\MatMulNBits_2_0_127.const", "file_size": 786432 }, "model.layers.4.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 196608 ], "size_in_bytes": 196608, "op_tensor_size": 196608, "offset": 462030848, "file_name": ".cache\\MatMulNBits_2_0_128.const", "file_size": 196608 }, "model.layers.5.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 462227456, "file_name": ".cache\\MatMulNBits_2_0_129.const", "file_size": 6144 }, "model.layers.5.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 6144 ], "size_in_bytes": 18874368, "op_tensor_size": 18874368, "offset": 462233600, "file_name": ".cache\\MatMulNBits_2_0_130.const", "file_size": 18874368 }, "model.layers.5.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 6144 ], "size_in_bytes": 24576, "op_tensor_size": 24576, "offset": 481107968, "file_name": ".cache\\MatMulNBits_2_0_131.const", "file_size": 24576 }, "model.layers.5.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 147456 ], "size_in_bytes": 589824, "op_tensor_size": 589824, "offset": 481132544, "file_name": ".cache\\MatMulNBits_2_0_132.const", "file_size": 589824 }, "model.layers.5.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 147456 ], "size_in_bytes": 147456, "op_tensor_size": 147456, "offset": 481722368, "file_name": ".cache\\MatMulNBits_2_0_133.const", "file_size": 147456 }, "model.layers.5.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 481869824, "file_name": ".cache\\MatMulNBits_2_0_134.const", "file_size": 9437184 }, "model.layers.5.attn.v_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 491307008, "file_name": ".cache\\MatMulNBits_2_0_135.const", "file_size": 12288 }, "model.layers.5.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 491319296, "file_name": ".cache\\MatMulNBits_2_0_136.const", "file_size": 294912 }, "model.layers.5.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 491614208, "file_name": ".cache\\MatMulNBits_2_0_137.const", "file_size": 73728 }, "model.layers.5.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 491687936, "file_name": ".cache\\MatMulNBits_2_0_138.const", "file_size": 9437184 }, "model.layers.5.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 501125120, "file_name": ".cache\\MatMulNBits_2_0_139.const", "file_size": 12288 }, "model.layers.5.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 501137408, "file_name": ".cache\\MatMulNBits_2_0_140.const", "file_size": 294912 }, "model.layers.5.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 501432320, "file_name": ".cache\\MatMulNBits_2_0_141.const", "file_size": 73728 }, "model.layers.5.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 501506048, "file_name": ".cache\\MatMulNBits_2_0_142.const", "file_size": 6144 }, "model.layers.5.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 501512192, "file_name": ".cache\\MatMulNBits_2_0_143.const", "file_size": 12582912 }, "model.layers.5.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 514095104, "file_name": ".cache\\MatMulNBits_2_0_144.const", "file_size": 786432 }, "model.layers.5.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 514881536, "file_name": ".cache\\MatMulNBits_2_0_145.const", "file_size": 98304 }, "model.layers.5.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 514979840, "file_name": ".cache\\MatMulNBits_2_0_146.const", "file_size": 32768 }, "model.layers.5.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 515012608, "file_name": ".cache\\MatMulNBits_2_0_147.const", "file_size": 12582912 }, "model.layers.5.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 527595520, "file_name": ".cache\\MatMulNBits_2_0_148.const", "file_size": 786432 }, "model.layers.5.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 528381952, "file_name": ".cache\\MatMulNBits_2_0_149.const", "file_size": 98304 }, "model.layers.5.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 528480256, "file_name": ".cache\\MatMulNBits_2_0_150.const", "file_size": 32768 }, "model.layers.5.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8192, 3072 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 528513024, "file_name": ".cache\\MatMulNBits_2_0_151.const", "file_size": 25165824 }, "model.layers.5.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 553678848, "file_name": ".cache\\MatMulNBits_2_0_152.const", "file_size": 12288 }, "model.layers.5.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 553691136, "file_name": ".cache\\MatMulNBits_2_0_153.const", "file_size": 786432 }, "model.layers.5.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 196608 ], "size_in_bytes": 196608, "op_tensor_size": 196608, "offset": 554477568, "file_name": ".cache\\MatMulNBits_2_0_154.const", "file_size": 196608 }, "model.layers.6.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 554674176, "file_name": ".cache\\MatMulNBits_2_0_155.const", "file_size": 6144 }, "model.layers.6.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 6144 ], "size_in_bytes": 18874368, "op_tensor_size": 18874368, "offset": 554680320, "file_name": ".cache\\MatMulNBits_2_0_156.const", "file_size": 18874368 }, "model.layers.6.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 6144 ], "size_in_bytes": 24576, "op_tensor_size": 24576, "offset": 573554688, "file_name": ".cache\\MatMulNBits_2_0_157.const", "file_size": 24576 }, "model.layers.6.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 147456 ], "size_in_bytes": 589824, "op_tensor_size": 589824, "offset": 573579264, "file_name": ".cache\\MatMulNBits_2_0_158.const", "file_size": 589824 }, "model.layers.6.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 147456 ], "size_in_bytes": 147456, "op_tensor_size": 147456, "offset": 574169088, "file_name": ".cache\\MatMulNBits_2_0_159.const", "file_size": 147456 }, "model.layers.6.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 574316544, "file_name": ".cache\\MatMulNBits_2_0_160.const", "file_size": 9437184 }, "model.layers.6.attn.v_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 583753728, "file_name": ".cache\\MatMulNBits_2_0_161.const", "file_size": 12288 }, "model.layers.6.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 583766016, "file_name": ".cache\\MatMulNBits_2_0_162.const", "file_size": 294912 }, "model.layers.6.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 584060928, "file_name": ".cache\\MatMulNBits_2_0_163.const", "file_size": 73728 }, "model.layers.6.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 584134656, "file_name": ".cache\\MatMulNBits_2_0_164.const", "file_size": 9437184 }, "model.layers.6.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 593571840, "file_name": ".cache\\MatMulNBits_2_0_165.const", "file_size": 12288 }, "model.layers.6.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 593584128, "file_name": ".cache\\MatMulNBits_2_0_166.const", "file_size": 294912 }, "model.layers.6.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 593879040, "file_name": ".cache\\MatMulNBits_2_0_167.const", "file_size": 73728 }, "model.layers.6.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 593952768, "file_name": ".cache\\MatMulNBits_2_0_168.const", "file_size": 6144 }, "model.layers.6.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 593958912, "file_name": ".cache\\MatMulNBits_2_0_169.const", "file_size": 12582912 }, "model.layers.6.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 606541824, "file_name": ".cache\\MatMulNBits_2_0_170.const", "file_size": 786432 }, "model.layers.6.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 607328256, "file_name": ".cache\\MatMulNBits_2_0_171.const", "file_size": 98304 }, "model.layers.6.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 607426560, "file_name": ".cache\\MatMulNBits_2_0_172.const", "file_size": 32768 }, "model.layers.6.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 607459328, "file_name": ".cache\\MatMulNBits_2_0_173.const", "file_size": 12582912 }, "model.layers.6.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 620042240, "file_name": ".cache\\MatMulNBits_2_0_174.const", "file_size": 786432 }, "model.layers.6.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 620828672, "file_name": ".cache\\MatMulNBits_2_0_175.const", "file_size": 98304 }, "model.layers.6.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 620926976, "file_name": ".cache\\MatMulNBits_2_0_176.const", "file_size": 32768 }, "model.layers.6.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8192, 3072 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 620959744, "file_name": ".cache\\MatMulNBits_2_0_177.const", "file_size": 25165824 }, "model.layers.6.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 646125568, "file_name": ".cache\\MatMulNBits_2_0_178.const", "file_size": 12288 }, "model.layers.6.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 646137856, "file_name": ".cache\\MatMulNBits_2_0_179.const", "file_size": 786432 }, "model.layers.6.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 196608 ], "size_in_bytes": 196608, "op_tensor_size": 196608, "offset": 646924288, "file_name": ".cache\\MatMulNBits_2_0_180.const", "file_size": 196608 }, "model.layers.7.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 647120896, "file_name": ".cache\\MatMulNBits_2_0_181.const", "file_size": 6144 }, "model.layers.7.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 6144 ], "size_in_bytes": 18874368, "op_tensor_size": 18874368, "offset": 647127040, "file_name": ".cache\\MatMulNBits_2_0_182.const", "file_size": 18874368 }, "model.layers.7.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 6144 ], "size_in_bytes": 24576, "op_tensor_size": 24576, "offset": 666001408, "file_name": ".cache\\MatMulNBits_2_0_183.const", "file_size": 24576 }, "model.layers.7.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 147456 ], "size_in_bytes": 589824, "op_tensor_size": 589824, "offset": 666025984, "file_name": ".cache\\MatMulNBits_2_0_184.const", "file_size": 589824 }, "model.layers.7.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 147456 ], "size_in_bytes": 147456, "op_tensor_size": 147456, "offset": 666615808, "file_name": ".cache\\MatMulNBits_2_0_185.const", "file_size": 147456 }, "model.layers.7.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 666763264, "file_name": ".cache\\MatMulNBits_2_0_186.const", "file_size": 9437184 }, "model.layers.7.attn.v_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 676200448, "file_name": ".cache\\MatMulNBits_2_0_187.const", "file_size": 12288 }, "model.layers.7.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 676212736, "file_name": ".cache\\MatMulNBits_2_0_188.const", "file_size": 294912 }, "model.layers.7.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 676507648, "file_name": ".cache\\MatMulNBits_2_0_189.const", "file_size": 73728 }, "model.layers.7.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 676581376, "file_name": ".cache\\MatMulNBits_2_0_190.const", "file_size": 9437184 }, "model.layers.7.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 686018560, "file_name": ".cache\\MatMulNBits_2_0_191.const", "file_size": 12288 }, "model.layers.7.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 686030848, "file_name": ".cache\\MatMulNBits_2_0_192.const", "file_size": 294912 }, "model.layers.7.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 686325760, "file_name": ".cache\\MatMulNBits_2_0_193.const", "file_size": 73728 }, "model.layers.7.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 686399488, "file_name": ".cache\\MatMulNBits_2_0_194.const", "file_size": 6144 }, "model.layers.7.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 686405632, "file_name": ".cache\\MatMulNBits_2_0_195.const", "file_size": 12582912 }, "model.layers.7.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 698988544, "file_name": ".cache\\MatMulNBits_2_0_196.const", "file_size": 786432 }, "model.layers.7.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 699774976, "file_name": ".cache\\MatMulNBits_2_0_197.const", "file_size": 98304 }, "model.layers.7.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 699873280, "file_name": ".cache\\MatMulNBits_2_0_198.const", "file_size": 32768 }, "model.layers.7.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 699906048, "file_name": ".cache\\MatMulNBits_2_0_199.const", "file_size": 12582912 }, "model.layers.7.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 712488960, "file_name": ".cache\\MatMulNBits_2_0_200.const", "file_size": 786432 }, "model.layers.7.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 713275392, "file_name": ".cache\\MatMulNBits_2_0_201.const", "file_size": 98304 }, "model.layers.7.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 713373696, "file_name": ".cache\\MatMulNBits_2_0_202.const", "file_size": 32768 }, "model.layers.7.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8192, 3072 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 713406464, "file_name": ".cache\\MatMulNBits_2_0_203.const", "file_size": 25165824 }, "model.layers.7.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 738572288, "file_name": ".cache\\MatMulNBits_2_0_204.const", "file_size": 12288 }, "model.layers.7.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 738584576, "file_name": ".cache\\MatMulNBits_2_0_205.const", "file_size": 786432 }, "model.layers.7.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 196608 ], "size_in_bytes": 196608, "op_tensor_size": 196608, "offset": 739371008, "file_name": ".cache\\MatMulNBits_2_0_206.const", "file_size": 196608 }, "model.layers.8.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 739567616, "file_name": ".cache\\MatMulNBits_2_0_207.const", "file_size": 6144 }, "model.layers.8.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 6144 ], "size_in_bytes": 18874368, "op_tensor_size": 18874368, "offset": 739573760, "file_name": ".cache\\MatMulNBits_2_0_208.const", "file_size": 18874368 }, "model.layers.8.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 6144 ], "size_in_bytes": 24576, "op_tensor_size": 24576, "offset": 758448128, "file_name": ".cache\\MatMulNBits_2_0_209.const", "file_size": 24576 }, "model.layers.8.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 147456 ], "size_in_bytes": 589824, "op_tensor_size": 589824, "offset": 758472704, "file_name": ".cache\\MatMulNBits_2_0_210.const", "file_size": 589824 }, "model.layers.8.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 147456 ], "size_in_bytes": 147456, "op_tensor_size": 147456, "offset": 759062528, "file_name": ".cache\\MatMulNBits_2_0_211.const", "file_size": 147456 }, "model.layers.8.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 759209984, "file_name": ".cache\\MatMulNBits_2_0_212.const", "file_size": 9437184 }, "model.layers.8.attn.v_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 768647168, "file_name": ".cache\\MatMulNBits_2_0_213.const", "file_size": 12288 }, "model.layers.8.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 768659456, "file_name": ".cache\\MatMulNBits_2_0_214.const", "file_size": 294912 }, "model.layers.8.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 768954368, "file_name": ".cache\\MatMulNBits_2_0_215.const", "file_size": 73728 }, "model.layers.8.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 769028096, "file_name": ".cache\\MatMulNBits_2_0_216.const", "file_size": 9437184 }, "model.layers.8.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 778465280, "file_name": ".cache\\MatMulNBits_2_0_217.const", "file_size": 12288 }, "model.layers.8.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 778477568, "file_name": ".cache\\MatMulNBits_2_0_218.const", "file_size": 294912 }, "model.layers.8.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 778772480, "file_name": ".cache\\MatMulNBits_2_0_219.const", "file_size": 73728 }, "model.layers.8.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 778846208, "file_name": ".cache\\MatMulNBits_2_0_220.const", "file_size": 6144 }, "model.layers.8.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 778852352, "file_name": ".cache\\MatMulNBits_2_0_221.const", "file_size": 12582912 }, "model.layers.8.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 791435264, "file_name": ".cache\\MatMulNBits_2_0_222.const", "file_size": 786432 }, "model.layers.8.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 792221696, "file_name": ".cache\\MatMulNBits_2_0_223.const", "file_size": 98304 }, "model.layers.8.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 792320000, "file_name": ".cache\\MatMulNBits_2_0_224.const", "file_size": 32768 }, "model.layers.8.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 792352768, "file_name": ".cache\\MatMulNBits_2_0_225.const", "file_size": 12582912 }, "model.layers.8.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 804935680, "file_name": ".cache\\MatMulNBits_2_0_226.const", "file_size": 786432 }, "model.layers.8.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 805722112, "file_name": ".cache\\MatMulNBits_2_0_227.const", "file_size": 98304 }, "model.layers.8.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 805820416, "file_name": ".cache\\MatMulNBits_2_0_228.const", "file_size": 32768 }, "model.layers.8.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8192, 3072 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 805853184, "file_name": ".cache\\MatMulNBits_2_0_229.const", "file_size": 25165824 }, "model.layers.8.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 831019008, "file_name": ".cache\\MatMulNBits_2_0_230.const", "file_size": 12288 }, "model.layers.8.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 831031296, "file_name": ".cache\\MatMulNBits_2_0_231.const", "file_size": 786432 }, "model.layers.8.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 196608 ], "size_in_bytes": 196608, "op_tensor_size": 196608, "offset": 831817728, "file_name": ".cache\\MatMulNBits_2_0_232.const", "file_size": 196608 }, "model.layers.9.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 832014336, "file_name": ".cache\\MatMulNBits_2_0_233.const", "file_size": 6144 }, "model.layers.9.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 6144 ], "size_in_bytes": 18874368, "op_tensor_size": 18874368, "offset": 832020480, "file_name": ".cache\\MatMulNBits_2_0_234.const", "file_size": 18874368 }, "model.layers.9.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 6144 ], "size_in_bytes": 24576, "op_tensor_size": 24576, "offset": 850894848, "file_name": ".cache\\MatMulNBits_2_0_235.const", "file_size": 24576 }, "model.layers.9.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 147456 ], "size_in_bytes": 589824, "op_tensor_size": 589824, "offset": 850919424, "file_name": ".cache\\MatMulNBits_2_0_236.const", "file_size": 589824 }, "model.layers.9.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 147456 ], "size_in_bytes": 147456, "op_tensor_size": 147456, "offset": 851509248, "file_name": ".cache\\MatMulNBits_2_0_237.const", "file_size": 147456 }, "model.layers.9.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 851656704, "file_name": ".cache\\MatMulNBits_2_0_238.const", "file_size": 9437184 }, "model.layers.9.attn.v_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 861093888, "file_name": ".cache\\MatMulNBits_2_0_239.const", "file_size": 12288 }, "model.layers.9.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 861106176, "file_name": ".cache\\MatMulNBits_2_0_240.const", "file_size": 294912 }, "model.layers.9.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 861401088, "file_name": ".cache\\MatMulNBits_2_0_241.const", "file_size": 73728 }, "model.layers.9.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 861474816, "file_name": ".cache\\MatMulNBits_2_0_242.const", "file_size": 9437184 }, "model.layers.9.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 870912000, "file_name": ".cache\\MatMulNBits_2_0_243.const", "file_size": 12288 }, "model.layers.9.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 870924288, "file_name": ".cache\\MatMulNBits_2_0_244.const", "file_size": 294912 }, "model.layers.9.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 871219200, "file_name": ".cache\\MatMulNBits_2_0_245.const", "file_size": 73728 }, "model.layers.9.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 871292928, "file_name": ".cache\\MatMulNBits_2_0_246.const", "file_size": 6144 }, "model.layers.9.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 871299072, "file_name": ".cache\\MatMulNBits_2_0_247.const", "file_size": 12582912 }, "model.layers.9.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 883881984, "file_name": ".cache\\MatMulNBits_2_0_248.const", "file_size": 786432 }, "model.layers.9.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 884668416, "file_name": ".cache\\MatMulNBits_2_0_249.const", "file_size": 98304 }, "model.layers.9.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 884766720, "file_name": ".cache\\MatMulNBits_2_0_250.const", "file_size": 32768 }, "model.layers.9.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 884799488, "file_name": ".cache\\MatMulNBits_2_0_251.const", "file_size": 12582912 }, "model.layers.9.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 897382400, "file_name": ".cache\\MatMulNBits_2_0_252.const", "file_size": 786432 }, "model.layers.9.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 898168832, "file_name": ".cache\\MatMulNBits_2_0_253.const", "file_size": 98304 }, "model.layers.9.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 898267136, "file_name": ".cache\\MatMulNBits_2_0_254.const", "file_size": 32768 }, "model.layers.9.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8192, 3072 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 898299904, "file_name": ".cache\\MatMulNBits_2_0_255.const", "file_size": 25165824 }, "model.layers.9.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 923465728, "file_name": ".cache\\MatMulNBits_2_0_256.const", "file_size": 12288 }, "model.layers.9.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 923478016, "file_name": ".cache\\MatMulNBits_2_0_257.const", "file_size": 786432 }, "model.layers.9.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 196608 ], "size_in_bytes": 196608, "op_tensor_size": 196608, "offset": 924264448, "file_name": ".cache\\MatMulNBits_2_0_258.const", "file_size": 196608 }, "model.layers.10.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 924461056, "file_name": ".cache\\MatMulNBits_2_0_259.const", "file_size": 6144 }, "model.layers.10.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 6144 ], "size_in_bytes": 18874368, "op_tensor_size": 18874368, "offset": 924467200, "file_name": ".cache\\MatMulNBits_2_0_260.const", "file_size": 18874368 }, "model.layers.10.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 6144 ], "size_in_bytes": 24576, "op_tensor_size": 24576, "offset": 943341568, "file_name": ".cache\\MatMulNBits_2_0_261.const", "file_size": 24576 }, "model.layers.10.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 147456 ], "size_in_bytes": 589824, "op_tensor_size": 589824, "offset": 943366144, "file_name": ".cache\\MatMulNBits_2_0_262.const", "file_size": 589824 }, "model.layers.10.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 147456 ], "size_in_bytes": 147456, "op_tensor_size": 147456, "offset": 943955968, "file_name": ".cache\\MatMulNBits_2_0_263.const", "file_size": 147456 }, "model.layers.10.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 944103424, "file_name": ".cache\\MatMulNBits_2_0_264.const", "file_size": 9437184 }, "model.layers.10.attn.v_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 953540608, "file_name": ".cache\\MatMulNBits_2_0_265.const", "file_size": 12288 }, "model.layers.10.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 953552896, "file_name": ".cache\\MatMulNBits_2_0_266.const", "file_size": 294912 }, "model.layers.10.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 953847808, "file_name": ".cache\\MatMulNBits_2_0_267.const", "file_size": 73728 }, "model.layers.10.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 953921536, "file_name": ".cache\\MatMulNBits_2_0_268.const", "file_size": 9437184 }, "model.layers.10.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 963358720, "file_name": ".cache\\MatMulNBits_2_0_269.const", "file_size": 12288 }, "model.layers.10.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 963371008, "file_name": ".cache\\MatMulNBits_2_0_270.const", "file_size": 294912 }, "model.layers.10.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 963665920, "file_name": ".cache\\MatMulNBits_2_0_271.const", "file_size": 73728 }, "model.layers.10.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 963739648, "file_name": ".cache\\MatMulNBits_2_0_272.const", "file_size": 6144 }, "model.layers.10.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 963745792, "file_name": ".cache\\MatMulNBits_2_0_273.const", "file_size": 12582912 }, "model.layers.10.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 976328704, "file_name": ".cache\\MatMulNBits_2_0_274.const", "file_size": 786432 }, "model.layers.10.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 977115136, "file_name": ".cache\\MatMulNBits_2_0_275.const", "file_size": 98304 }, "model.layers.10.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 977213440, "file_name": ".cache\\MatMulNBits_2_0_276.const", "file_size": 32768 }, "model.layers.10.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 977246208, "file_name": ".cache\\MatMulNBits_2_0_277.const", "file_size": 12582912 }, "model.layers.10.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 989829120, "file_name": ".cache\\MatMulNBits_2_0_278.const", "file_size": 786432 }, "model.layers.10.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 990615552, "file_name": ".cache\\MatMulNBits_2_0_279.const", "file_size": 98304 }, "model.layers.10.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 990713856, "file_name": ".cache\\MatMulNBits_2_0_280.const", "file_size": 32768 }, "model.layers.10.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8192, 3072 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 990746624, "file_name": ".cache\\MatMulNBits_2_0_281.const", "file_size": 25165824 }, "model.layers.10.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 1015912448, "file_name": ".cache\\MatMulNBits_2_0_282.const", "file_size": 12288 }, "model.layers.10.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 1015924736, "file_name": ".cache\\MatMulNBits_2_0_283.const", "file_size": 786432 }, "model.layers.10.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 196608 ], "size_in_bytes": 196608, "op_tensor_size": 196608, "offset": 1016711168, "file_name": ".cache\\MatMulNBits_2_0_284.const", "file_size": 196608 }, "model.layers.11.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1016907776, "file_name": ".cache\\MatMulNBits_2_0_285.const", "file_size": 6144 }, "model.layers.11.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 6144 ], "size_in_bytes": 18874368, "op_tensor_size": 18874368, "offset": 1016913920, "file_name": ".cache\\MatMulNBits_2_0_286.const", "file_size": 18874368 }, "model.layers.11.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 6144 ], "size_in_bytes": 24576, "op_tensor_size": 24576, "offset": 1035788288, "file_name": ".cache\\MatMulNBits_2_0_287.const", "file_size": 24576 }, "model.layers.11.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 147456 ], "size_in_bytes": 589824, "op_tensor_size": 589824, "offset": 1035812864, "file_name": ".cache\\MatMulNBits_2_0_288.const", "file_size": 589824 }, "model.layers.11.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 147456 ], "size_in_bytes": 147456, "op_tensor_size": 147456, "offset": 1036402688, "file_name": ".cache\\MatMulNBits_2_0_289.const", "file_size": 147456 }, "model.layers.11.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 1036550144, "file_name": ".cache\\MatMulNBits_2_0_290.const", "file_size": 9437184 }, "model.layers.11.attn.v_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 1045987328, "file_name": ".cache\\MatMulNBits_2_0_291.const", "file_size": 12288 }, "model.layers.11.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 1045999616, "file_name": ".cache\\MatMulNBits_2_0_292.const", "file_size": 294912 }, "model.layers.11.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 1046294528, "file_name": ".cache\\MatMulNBits_2_0_293.const", "file_size": 73728 }, "model.layers.11.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 1046368256, "file_name": ".cache\\MatMulNBits_2_0_294.const", "file_size": 9437184 }, "model.layers.11.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 1055805440, "file_name": ".cache\\MatMulNBits_2_0_295.const", "file_size": 12288 }, "model.layers.11.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 1055817728, "file_name": ".cache\\MatMulNBits_2_0_296.const", "file_size": 294912 }, "model.layers.11.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 1056112640, "file_name": ".cache\\MatMulNBits_2_0_297.const", "file_size": 73728 }, "model.layers.11.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1056186368, "file_name": ".cache\\MatMulNBits_2_0_298.const", "file_size": 6144 }, "model.layers.11.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 1056192512, "file_name": ".cache\\MatMulNBits_2_0_299.const", "file_size": 12582912 }, "model.layers.11.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 1068775424, "file_name": ".cache\\MatMulNBits_2_0_300.const", "file_size": 786432 }, "model.layers.11.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 1069561856, "file_name": ".cache\\MatMulNBits_2_0_301.const", "file_size": 98304 }, "model.layers.11.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 1069660160, "file_name": ".cache\\MatMulNBits_2_0_302.const", "file_size": 32768 }, "model.layers.11.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 1069692928, "file_name": ".cache\\MatMulNBits_2_0_303.const", "file_size": 12582912 }, "model.layers.11.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 1082275840, "file_name": ".cache\\MatMulNBits_2_0_304.const", "file_size": 786432 }, "model.layers.11.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 1083062272, "file_name": ".cache\\MatMulNBits_2_0_305.const", "file_size": 98304 }, "model.layers.11.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 1083160576, "file_name": ".cache\\MatMulNBits_2_0_306.const", "file_size": 32768 }, "model.layers.11.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8192, 3072 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1083193344, "file_name": ".cache\\MatMulNBits_2_0_307.const", "file_size": 25165824 }, "model.layers.11.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 1108359168, "file_name": ".cache\\MatMulNBits_2_0_308.const", "file_size": 12288 }, "model.layers.11.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 1108371456, "file_name": ".cache\\MatMulNBits_2_0_309.const", "file_size": 786432 }, "model.layers.11.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 196608 ], "size_in_bytes": 196608, "op_tensor_size": 196608, "offset": 1109157888, "file_name": ".cache\\MatMulNBits_2_0_310.const", "file_size": 196608 }, "model.layers.12.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1109354496, "file_name": ".cache\\MatMulNBits_2_0_311.const", "file_size": 6144 }, "model.layers.12.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 6144 ], "size_in_bytes": 18874368, "op_tensor_size": 18874368, "offset": 1109360640, "file_name": ".cache\\MatMulNBits_2_0_312.const", "file_size": 18874368 }, "model.layers.12.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 6144 ], "size_in_bytes": 24576, "op_tensor_size": 24576, "offset": 1128235008, "file_name": ".cache\\MatMulNBits_2_0_313.const", "file_size": 24576 }, "model.layers.12.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 147456 ], "size_in_bytes": 589824, "op_tensor_size": 589824, "offset": 1128259584, "file_name": ".cache\\MatMulNBits_2_0_314.const", "file_size": 589824 }, "model.layers.12.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 147456 ], "size_in_bytes": 147456, "op_tensor_size": 147456, "offset": 1128849408, "file_name": ".cache\\MatMulNBits_2_0_315.const", "file_size": 147456 }, "model.layers.12.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 1128996864, "file_name": ".cache\\MatMulNBits_2_0_316.const", "file_size": 9437184 }, "model.layers.12.attn.v_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 1138434048, "file_name": ".cache\\MatMulNBits_2_0_317.const", "file_size": 12288 }, "model.layers.12.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 1138446336, "file_name": ".cache\\MatMulNBits_2_0_318.const", "file_size": 294912 }, "model.layers.12.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 1138741248, "file_name": ".cache\\MatMulNBits_2_0_319.const", "file_size": 73728 }, "model.layers.12.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 1138814976, "file_name": ".cache\\MatMulNBits_2_0_320.const", "file_size": 9437184 }, "model.layers.12.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 1148252160, "file_name": ".cache\\MatMulNBits_2_0_321.const", "file_size": 12288 }, "model.layers.12.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 1148264448, "file_name": ".cache\\MatMulNBits_2_0_322.const", "file_size": 294912 }, "model.layers.12.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 1148559360, "file_name": ".cache\\MatMulNBits_2_0_323.const", "file_size": 73728 }, "model.layers.12.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1148633088, "file_name": ".cache\\MatMulNBits_2_0_324.const", "file_size": 6144 }, "model.layers.12.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 1148639232, "file_name": ".cache\\MatMulNBits_2_0_325.const", "file_size": 12582912 }, "model.layers.12.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 1161222144, "file_name": ".cache\\MatMulNBits_2_0_326.const", "file_size": 786432 }, "model.layers.12.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 1162008576, "file_name": ".cache\\MatMulNBits_2_0_327.const", "file_size": 98304 }, "model.layers.12.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 1162106880, "file_name": ".cache\\MatMulNBits_2_0_328.const", "file_size": 32768 }, "model.layers.12.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 1162139648, "file_name": ".cache\\MatMulNBits_2_0_329.const", "file_size": 12582912 }, "model.layers.12.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 1174722560, "file_name": ".cache\\MatMulNBits_2_0_330.const", "file_size": 786432 }, "model.layers.12.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 1175508992, "file_name": ".cache\\MatMulNBits_2_0_331.const", "file_size": 98304 }, "model.layers.12.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 1175607296, "file_name": ".cache\\MatMulNBits_2_0_332.const", "file_size": 32768 }, "model.layers.12.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8192, 3072 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1175640064, "file_name": ".cache\\MatMulNBits_2_0_333.const", "file_size": 25165824 }, "model.layers.12.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 1200805888, "file_name": ".cache\\MatMulNBits_2_0_334.const", "file_size": 12288 }, "model.layers.12.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 1200818176, "file_name": ".cache\\MatMulNBits_2_0_335.const", "file_size": 786432 }, "model.layers.12.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 196608 ], "size_in_bytes": 196608, "op_tensor_size": 196608, "offset": 1201604608, "file_name": ".cache\\MatMulNBits_2_0_336.const", "file_size": 196608 }, "model.layers.13.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1201801216, "file_name": ".cache\\MatMulNBits_2_0_337.const", "file_size": 6144 }, "model.layers.13.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 6144 ], "size_in_bytes": 18874368, "op_tensor_size": 18874368, "offset": 1201807360, "file_name": ".cache\\MatMulNBits_2_0_338.const", "file_size": 18874368 }, "model.layers.13.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 6144 ], "size_in_bytes": 24576, "op_tensor_size": 24576, "offset": 1220681728, "file_name": ".cache\\MatMulNBits_2_0_339.const", "file_size": 24576 }, "model.layers.13.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 147456 ], "size_in_bytes": 589824, "op_tensor_size": 589824, "offset": 1220706304, "file_name": ".cache\\MatMulNBits_2_0_340.const", "file_size": 589824 }, "model.layers.13.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 147456 ], "size_in_bytes": 147456, "op_tensor_size": 147456, "offset": 1221296128, "file_name": ".cache\\MatMulNBits_2_0_341.const", "file_size": 147456 }, "model.layers.13.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 1221443584, "file_name": ".cache\\MatMulNBits_2_0_342.const", "file_size": 9437184 }, "model.layers.13.attn.v_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 1230880768, "file_name": ".cache\\MatMulNBits_2_0_343.const", "file_size": 12288 }, "model.layers.13.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 1230893056, "file_name": ".cache\\MatMulNBits_2_0_344.const", "file_size": 294912 }, "model.layers.13.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 1231187968, "file_name": ".cache\\MatMulNBits_2_0_345.const", "file_size": 73728 }, "model.layers.13.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 1231261696, "file_name": ".cache\\MatMulNBits_2_0_346.const", "file_size": 9437184 }, "model.layers.13.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 1240698880, "file_name": ".cache\\MatMulNBits_2_0_347.const", "file_size": 12288 }, "model.layers.13.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 1240711168, "file_name": ".cache\\MatMulNBits_2_0_348.const", "file_size": 294912 }, "model.layers.13.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 1241006080, "file_name": ".cache\\MatMulNBits_2_0_349.const", "file_size": 73728 }, "model.layers.13.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1241079808, "file_name": ".cache\\MatMulNBits_2_0_350.const", "file_size": 6144 }, "model.layers.13.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 1241085952, "file_name": ".cache\\MatMulNBits_2_0_351.const", "file_size": 12582912 }, "model.layers.13.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 1253668864, "file_name": ".cache\\MatMulNBits_2_0_352.const", "file_size": 786432 }, "model.layers.13.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 1254455296, "file_name": ".cache\\MatMulNBits_2_0_353.const", "file_size": 98304 }, "model.layers.13.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 1254553600, "file_name": ".cache\\MatMulNBits_2_0_354.const", "file_size": 32768 }, "model.layers.13.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 1254586368, "file_name": ".cache\\MatMulNBits_2_0_355.const", "file_size": 12582912 }, "model.layers.13.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 1267169280, "file_name": ".cache\\MatMulNBits_2_0_356.const", "file_size": 786432 }, "model.layers.13.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 1267955712, "file_name": ".cache\\MatMulNBits_2_0_357.const", "file_size": 98304 }, "model.layers.13.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 1268054016, "file_name": ".cache\\MatMulNBits_2_0_358.const", "file_size": 32768 }, "model.layers.13.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8192, 3072 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1268086784, "file_name": ".cache\\MatMulNBits_2_0_359.const", "file_size": 25165824 }, "model.layers.13.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 1293252608, "file_name": ".cache\\MatMulNBits_2_0_360.const", "file_size": 12288 }, "model.layers.13.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 1293264896, "file_name": ".cache\\MatMulNBits_2_0_361.const", "file_size": 786432 }, "model.layers.13.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 196608 ], "size_in_bytes": 196608, "op_tensor_size": 196608, "offset": 1294051328, "file_name": ".cache\\MatMulNBits_2_0_362.const", "file_size": 196608 }, "model.layers.14.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1294247936, "file_name": ".cache\\MatMulNBits_2_0_363.const", "file_size": 6144 }, "model.layers.14.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 6144 ], "size_in_bytes": 18874368, "op_tensor_size": 18874368, "offset": 1294254080, "file_name": ".cache\\MatMulNBits_2_0_364.const", "file_size": 18874368 }, "model.layers.14.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 6144 ], "size_in_bytes": 24576, "op_tensor_size": 24576, "offset": 1313128448, "file_name": ".cache\\MatMulNBits_2_0_365.const", "file_size": 24576 }, "model.layers.14.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 147456 ], "size_in_bytes": 589824, "op_tensor_size": 589824, "offset": 1313153024, "file_name": ".cache\\MatMulNBits_2_0_366.const", "file_size": 589824 }, "model.layers.14.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 147456 ], "size_in_bytes": 147456, "op_tensor_size": 147456, "offset": 1313742848, "file_name": ".cache\\MatMulNBits_2_0_367.const", "file_size": 147456 }, "model.layers.14.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 1313890304, "file_name": ".cache\\MatMulNBits_2_0_368.const", "file_size": 9437184 }, "model.layers.14.attn.v_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 1323327488, "file_name": ".cache\\MatMulNBits_2_0_369.const", "file_size": 12288 }, "model.layers.14.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 1323339776, "file_name": ".cache\\MatMulNBits_2_0_370.const", "file_size": 294912 }, "model.layers.14.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 1323634688, "file_name": ".cache\\MatMulNBits_2_0_371.const", "file_size": 73728 }, "model.layers.14.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 1323708416, "file_name": ".cache\\MatMulNBits_2_0_372.const", "file_size": 9437184 }, "model.layers.14.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 1333145600, "file_name": ".cache\\MatMulNBits_2_0_373.const", "file_size": 12288 }, "model.layers.14.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 1333157888, "file_name": ".cache\\MatMulNBits_2_0_374.const", "file_size": 294912 }, "model.layers.14.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 1333452800, "file_name": ".cache\\MatMulNBits_2_0_375.const", "file_size": 73728 }, "model.layers.14.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1333526528, "file_name": ".cache\\MatMulNBits_2_0_376.const", "file_size": 6144 }, "model.layers.14.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 1333532672, "file_name": ".cache\\MatMulNBits_2_0_377.const", "file_size": 12582912 }, "model.layers.14.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 1346115584, "file_name": ".cache\\MatMulNBits_2_0_378.const", "file_size": 786432 }, "model.layers.14.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 1346902016, "file_name": ".cache\\MatMulNBits_2_0_379.const", "file_size": 98304 }, "model.layers.14.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 1347000320, "file_name": ".cache\\MatMulNBits_2_0_380.const", "file_size": 32768 }, "model.layers.14.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 1347033088, "file_name": ".cache\\MatMulNBits_2_0_381.const", "file_size": 12582912 }, "model.layers.14.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 1359616000, "file_name": ".cache\\MatMulNBits_2_0_382.const", "file_size": 786432 }, "model.layers.14.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 1360402432, "file_name": ".cache\\MatMulNBits_2_0_383.const", "file_size": 98304 }, "model.layers.14.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 1360500736, "file_name": ".cache\\MatMulNBits_2_0_384.const", "file_size": 32768 }, "model.layers.14.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8192, 3072 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1360533504, "file_name": ".cache\\MatMulNBits_2_0_385.const", "file_size": 25165824 }, "model.layers.14.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 1385699328, "file_name": ".cache\\MatMulNBits_2_0_386.const", "file_size": 12288 }, "model.layers.14.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 1385711616, "file_name": ".cache\\MatMulNBits_2_0_387.const", "file_size": 786432 }, "model.layers.14.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 196608 ], "size_in_bytes": 196608, "op_tensor_size": 196608, "offset": 1386498048, "file_name": ".cache\\MatMulNBits_2_0_388.const", "file_size": 196608 }, "model.layers.15.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1386694656, "file_name": ".cache\\MatMulNBits_2_0_389.const", "file_size": 6144 }, "model.layers.15.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 6144 ], "size_in_bytes": 18874368, "op_tensor_size": 18874368, "offset": 1386700800, "file_name": ".cache\\MatMulNBits_2_0_390.const", "file_size": 18874368 }, "model.layers.15.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 6144 ], "size_in_bytes": 24576, "op_tensor_size": 24576, "offset": 1405575168, "file_name": ".cache\\MatMulNBits_2_0_391.const", "file_size": 24576 }, "model.layers.15.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 147456 ], "size_in_bytes": 589824, "op_tensor_size": 589824, "offset": 1405599744, "file_name": ".cache\\MatMulNBits_2_0_392.const", "file_size": 589824 }, "model.layers.15.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 147456 ], "size_in_bytes": 147456, "op_tensor_size": 147456, "offset": 1406189568, "file_name": ".cache\\MatMulNBits_2_0_393.const", "file_size": 147456 }, "model.layers.15.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 1406337024, "file_name": ".cache\\MatMulNBits_2_0_394.const", "file_size": 9437184 }, "model.layers.15.attn.v_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 1415774208, "file_name": ".cache\\MatMulNBits_2_0_395.const", "file_size": 12288 }, "model.layers.15.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 1415786496, "file_name": ".cache\\MatMulNBits_2_0_396.const", "file_size": 294912 }, "model.layers.15.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 1416081408, "file_name": ".cache\\MatMulNBits_2_0_397.const", "file_size": 73728 }, "model.layers.15.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 1416155136, "file_name": ".cache\\MatMulNBits_2_0_398.const", "file_size": 9437184 }, "model.layers.15.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 1425592320, "file_name": ".cache\\MatMulNBits_2_0_399.const", "file_size": 12288 }, "model.layers.15.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 1425604608, "file_name": ".cache\\MatMulNBits_2_0_400.const", "file_size": 294912 }, "model.layers.15.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 1425899520, "file_name": ".cache\\MatMulNBits_2_0_401.const", "file_size": 73728 }, "model.layers.15.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1425973248, "file_name": ".cache\\MatMulNBits_2_0_402.const", "file_size": 6144 }, "model.layers.15.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 1425979392, "file_name": ".cache\\MatMulNBits_2_0_403.const", "file_size": 12582912 }, "model.layers.15.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 1438562304, "file_name": ".cache\\MatMulNBits_2_0_404.const", "file_size": 786432 }, "model.layers.15.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 1439348736, "file_name": ".cache\\MatMulNBits_2_0_405.const", "file_size": 98304 }, "model.layers.15.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 1439447040, "file_name": ".cache\\MatMulNBits_2_0_406.const", "file_size": 32768 }, "model.layers.15.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 1439479808, "file_name": ".cache\\MatMulNBits_2_0_407.const", "file_size": 12582912 }, "model.layers.15.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 1452062720, "file_name": ".cache\\MatMulNBits_2_0_408.const", "file_size": 786432 }, "model.layers.15.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 1452849152, "file_name": ".cache\\MatMulNBits_2_0_409.const", "file_size": 98304 }, "model.layers.15.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 1452947456, "file_name": ".cache\\MatMulNBits_2_0_410.const", "file_size": 32768 }, "model.layers.15.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8192, 3072 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1452980224, "file_name": ".cache\\MatMulNBits_2_0_411.const", "file_size": 25165824 }, "model.layers.15.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 1478146048, "file_name": ".cache\\MatMulNBits_2_0_412.const", "file_size": 12288 }, "model.layers.15.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 1478158336, "file_name": ".cache\\MatMulNBits_2_0_413.const", "file_size": 786432 }, "model.layers.15.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 196608 ], "size_in_bytes": 196608, "op_tensor_size": 196608, "offset": 1478944768, "file_name": ".cache\\MatMulNBits_2_0_414.const", "file_size": 196608 }, "model.layers.16.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1479141376, "file_name": ".cache\\MatMulNBits_2_0_415.const", "file_size": 6144 }, "model.layers.16.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 6144 ], "size_in_bytes": 18874368, "op_tensor_size": 18874368, "offset": 1479147520, "file_name": ".cache\\MatMulNBits_2_0_416.const", "file_size": 18874368 }, "model.layers.16.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 6144 ], "size_in_bytes": 24576, "op_tensor_size": 24576, "offset": 1498021888, "file_name": ".cache\\MatMulNBits_2_0_417.const", "file_size": 24576 }, "model.layers.16.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 147456 ], "size_in_bytes": 589824, "op_tensor_size": 589824, "offset": 1498046464, "file_name": ".cache\\MatMulNBits_2_0_418.const", "file_size": 589824 }, "model.layers.16.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 147456 ], "size_in_bytes": 147456, "op_tensor_size": 147456, "offset": 1498636288, "file_name": ".cache\\MatMulNBits_2_0_419.const", "file_size": 147456 }, "model.layers.16.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 1498783744, "file_name": ".cache\\MatMulNBits_2_0_420.const", "file_size": 9437184 }, "model.layers.16.attn.v_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 1508220928, "file_name": ".cache\\MatMulNBits_2_0_421.const", "file_size": 12288 }, "model.layers.16.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 1508233216, "file_name": ".cache\\MatMulNBits_2_0_422.const", "file_size": 294912 }, "model.layers.16.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 1508528128, "file_name": ".cache\\MatMulNBits_2_0_423.const", "file_size": 73728 }, "model.layers.16.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 1508601856, "file_name": ".cache\\MatMulNBits_2_0_424.const", "file_size": 9437184 }, "model.layers.16.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 1518039040, "file_name": ".cache\\MatMulNBits_2_0_425.const", "file_size": 12288 }, "model.layers.16.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 1518051328, "file_name": ".cache\\MatMulNBits_2_0_426.const", "file_size": 294912 }, "model.layers.16.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 1518346240, "file_name": ".cache\\MatMulNBits_2_0_427.const", "file_size": 73728 }, "model.layers.16.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1518419968, "file_name": ".cache\\MatMulNBits_2_0_428.const", "file_size": 6144 }, "model.layers.16.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 1518426112, "file_name": ".cache\\MatMulNBits_2_0_429.const", "file_size": 12582912 }, "model.layers.16.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 1531009024, "file_name": ".cache\\MatMulNBits_2_0_430.const", "file_size": 786432 }, "model.layers.16.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 1531795456, "file_name": ".cache\\MatMulNBits_2_0_431.const", "file_size": 98304 }, "model.layers.16.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 1531893760, "file_name": ".cache\\MatMulNBits_2_0_432.const", "file_size": 32768 }, "model.layers.16.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 1531926528, "file_name": ".cache\\MatMulNBits_2_0_433.const", "file_size": 12582912 }, "model.layers.16.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 1544509440, "file_name": ".cache\\MatMulNBits_2_0_434.const", "file_size": 786432 }, "model.layers.16.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 1545295872, "file_name": ".cache\\MatMulNBits_2_0_435.const", "file_size": 98304 }, "model.layers.16.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 1545394176, "file_name": ".cache\\MatMulNBits_2_0_436.const", "file_size": 32768 }, "model.layers.16.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8192, 3072 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1545426944, "file_name": ".cache\\MatMulNBits_2_0_437.const", "file_size": 25165824 }, "model.layers.16.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 1570592768, "file_name": ".cache\\MatMulNBits_2_0_438.const", "file_size": 12288 }, "model.layers.16.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 1570605056, "file_name": ".cache\\MatMulNBits_2_0_439.const", "file_size": 786432 }, "model.layers.16.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 196608 ], "size_in_bytes": 196608, "op_tensor_size": 196608, "offset": 1571391488, "file_name": ".cache\\MatMulNBits_2_0_440.const", "file_size": 196608 }, "model.layers.17.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1571588096, "file_name": ".cache\\MatMulNBits_2_0_441.const", "file_size": 6144 }, "model.layers.17.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 6144 ], "size_in_bytes": 18874368, "op_tensor_size": 18874368, "offset": 1571594240, "file_name": ".cache\\MatMulNBits_2_0_442.const", "file_size": 18874368 }, "model.layers.17.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 6144 ], "size_in_bytes": 24576, "op_tensor_size": 24576, "offset": 1590468608, "file_name": ".cache\\MatMulNBits_2_0_443.const", "file_size": 24576 }, "model.layers.17.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 147456 ], "size_in_bytes": 589824, "op_tensor_size": 589824, "offset": 1590493184, "file_name": ".cache\\MatMulNBits_2_0_444.const", "file_size": 589824 }, "model.layers.17.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 147456 ], "size_in_bytes": 147456, "op_tensor_size": 147456, "offset": 1591083008, "file_name": ".cache\\MatMulNBits_2_0_445.const", "file_size": 147456 }, "model.layers.17.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 1591230464, "file_name": ".cache\\MatMulNBits_2_0_446.const", "file_size": 9437184 }, "model.layers.17.attn.v_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 1600667648, "file_name": ".cache\\MatMulNBits_2_0_447.const", "file_size": 12288 }, "model.layers.17.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 1600679936, "file_name": ".cache\\MatMulNBits_2_0_448.const", "file_size": 294912 }, "model.layers.17.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 1600974848, "file_name": ".cache\\MatMulNBits_2_0_449.const", "file_size": 73728 }, "model.layers.17.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 1601048576, "file_name": ".cache\\MatMulNBits_2_0_450.const", "file_size": 9437184 }, "model.layers.17.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 1610485760, "file_name": ".cache\\MatMulNBits_2_0_451.const", "file_size": 12288 }, "model.layers.17.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 1610498048, "file_name": ".cache\\MatMulNBits_2_0_452.const", "file_size": 294912 }, "model.layers.17.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 1610792960, "file_name": ".cache\\MatMulNBits_2_0_453.const", "file_size": 73728 }, "model.layers.17.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1610866688, "file_name": ".cache\\MatMulNBits_2_0_454.const", "file_size": 6144 }, "model.layers.17.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 1610872832, "file_name": ".cache\\MatMulNBits_2_0_455.const", "file_size": 12582912 }, "model.layers.17.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 1623455744, "file_name": ".cache\\MatMulNBits_2_0_456.const", "file_size": 786432 }, "model.layers.17.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 1624242176, "file_name": ".cache\\MatMulNBits_2_0_457.const", "file_size": 98304 }, "model.layers.17.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 1624340480, "file_name": ".cache\\MatMulNBits_2_0_458.const", "file_size": 32768 }, "model.layers.17.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 1624373248, "file_name": ".cache\\MatMulNBits_2_0_459.const", "file_size": 12582912 }, "model.layers.17.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 1636956160, "file_name": ".cache\\MatMulNBits_2_0_460.const", "file_size": 786432 }, "model.layers.17.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 1637742592, "file_name": ".cache\\MatMulNBits_2_0_461.const", "file_size": 98304 }, "model.layers.17.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 1637840896, "file_name": ".cache\\MatMulNBits_2_0_462.const", "file_size": 32768 }, "model.layers.17.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8192, 3072 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1637873664, "file_name": ".cache\\MatMulNBits_2_0_463.const", "file_size": 25165824 }, "model.layers.17.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 1663039488, "file_name": ".cache\\MatMulNBits_2_0_464.const", "file_size": 12288 }, "model.layers.17.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 1663051776, "file_name": ".cache\\MatMulNBits_2_0_465.const", "file_size": 786432 }, "model.layers.17.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 196608 ], "size_in_bytes": 196608, "op_tensor_size": 196608, "offset": 1663838208, "file_name": ".cache\\MatMulNBits_2_0_466.const", "file_size": 196608 }, "model.layers.18.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1664034816, "file_name": ".cache\\MatMulNBits_2_0_467.const", "file_size": 6144 }, "model.layers.18.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 6144 ], "size_in_bytes": 18874368, "op_tensor_size": 18874368, "offset": 1664040960, "file_name": ".cache\\MatMulNBits_2_0_468.const", "file_size": 18874368 }, "model.layers.18.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 6144 ], "size_in_bytes": 24576, "op_tensor_size": 24576, "offset": 1682915328, "file_name": ".cache\\MatMulNBits_2_0_469.const", "file_size": 24576 }, "model.layers.18.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 147456 ], "size_in_bytes": 589824, "op_tensor_size": 589824, "offset": 1682939904, "file_name": ".cache\\MatMulNBits_2_0_470.const", "file_size": 589824 }, "model.layers.18.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 147456 ], "size_in_bytes": 147456, "op_tensor_size": 147456, "offset": 1683529728, "file_name": ".cache\\MatMulNBits_2_0_471.const", "file_size": 147456 }, "model.layers.18.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 1683677184, "file_name": ".cache\\MatMulNBits_2_0_472.const", "file_size": 9437184 }, "model.layers.18.attn.v_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 1693114368, "file_name": ".cache\\MatMulNBits_2_0_473.const", "file_size": 12288 }, "model.layers.18.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 1693126656, "file_name": ".cache\\MatMulNBits_2_0_474.const", "file_size": 294912 }, "model.layers.18.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 1693421568, "file_name": ".cache\\MatMulNBits_2_0_475.const", "file_size": 73728 }, "model.layers.18.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 1693495296, "file_name": ".cache\\MatMulNBits_2_0_476.const", "file_size": 9437184 }, "model.layers.18.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 1702932480, "file_name": ".cache\\MatMulNBits_2_0_477.const", "file_size": 12288 }, "model.layers.18.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 1702944768, "file_name": ".cache\\MatMulNBits_2_0_478.const", "file_size": 294912 }, "model.layers.18.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 1703239680, "file_name": ".cache\\MatMulNBits_2_0_479.const", "file_size": 73728 }, "model.layers.18.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1703313408, "file_name": ".cache\\MatMulNBits_2_0_480.const", "file_size": 6144 }, "model.layers.18.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 1703319552, "file_name": ".cache\\MatMulNBits_2_0_481.const", "file_size": 12582912 }, "model.layers.18.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 1715902464, "file_name": ".cache\\MatMulNBits_2_0_482.const", "file_size": 786432 }, "model.layers.18.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 1716688896, "file_name": ".cache\\MatMulNBits_2_0_483.const", "file_size": 98304 }, "model.layers.18.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 1716787200, "file_name": ".cache\\MatMulNBits_2_0_484.const", "file_size": 32768 }, "model.layers.18.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 1716819968, "file_name": ".cache\\MatMulNBits_2_0_485.const", "file_size": 12582912 }, "model.layers.18.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 1729402880, "file_name": ".cache\\MatMulNBits_2_0_486.const", "file_size": 786432 }, "model.layers.18.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 1730189312, "file_name": ".cache\\MatMulNBits_2_0_487.const", "file_size": 98304 }, "model.layers.18.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 1730287616, "file_name": ".cache\\MatMulNBits_2_0_488.const", "file_size": 32768 }, "model.layers.18.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8192, 3072 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1730320384, "file_name": ".cache\\MatMulNBits_2_0_489.const", "file_size": 25165824 }, "model.layers.18.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 1755486208, "file_name": ".cache\\MatMulNBits_2_0_490.const", "file_size": 12288 }, "model.layers.18.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 1755498496, "file_name": ".cache\\MatMulNBits_2_0_491.const", "file_size": 786432 }, "model.layers.18.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 196608 ], "size_in_bytes": 196608, "op_tensor_size": 196608, "offset": 1756284928, "file_name": ".cache\\MatMulNBits_2_0_492.const", "file_size": 196608 }, "model.layers.19.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1756481536, "file_name": ".cache\\MatMulNBits_2_0_493.const", "file_size": 6144 }, "model.layers.19.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 6144 ], "size_in_bytes": 18874368, "op_tensor_size": 18874368, "offset": 1756487680, "file_name": ".cache\\MatMulNBits_2_0_494.const", "file_size": 18874368 }, "model.layers.19.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 6144 ], "size_in_bytes": 24576, "op_tensor_size": 24576, "offset": 1775362048, "file_name": ".cache\\MatMulNBits_2_0_495.const", "file_size": 24576 }, "model.layers.19.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 147456 ], "size_in_bytes": 589824, "op_tensor_size": 589824, "offset": 1775386624, "file_name": ".cache\\MatMulNBits_2_0_496.const", "file_size": 589824 }, "model.layers.19.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 147456 ], "size_in_bytes": 147456, "op_tensor_size": 147456, "offset": 1775976448, "file_name": ".cache\\MatMulNBits_2_0_497.const", "file_size": 147456 }, "model.layers.19.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 1776123904, "file_name": ".cache\\MatMulNBits_2_0_498.const", "file_size": 9437184 }, "model.layers.19.attn.v_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 1785561088, "file_name": ".cache\\MatMulNBits_2_0_499.const", "file_size": 12288 }, "model.layers.19.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 1785573376, "file_name": ".cache\\MatMulNBits_2_0_500.const", "file_size": 294912 }, "model.layers.19.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 1785868288, "file_name": ".cache\\MatMulNBits_2_0_501.const", "file_size": 73728 }, "model.layers.19.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 1785942016, "file_name": ".cache\\MatMulNBits_2_0_502.const", "file_size": 9437184 }, "model.layers.19.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 1795379200, "file_name": ".cache\\MatMulNBits_2_0_503.const", "file_size": 12288 }, "model.layers.19.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 1795391488, "file_name": ".cache\\MatMulNBits_2_0_504.const", "file_size": 294912 }, "model.layers.19.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 1795686400, "file_name": ".cache\\MatMulNBits_2_0_505.const", "file_size": 73728 }, "model.layers.19.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1795760128, "file_name": ".cache\\MatMulNBits_2_0_506.const", "file_size": 6144 }, "model.layers.19.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 1795766272, "file_name": ".cache\\MatMulNBits_2_0_507.const", "file_size": 12582912 }, "model.layers.19.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 1808349184, "file_name": ".cache\\MatMulNBits_2_0_508.const", "file_size": 786432 }, "model.layers.19.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 1809135616, "file_name": ".cache\\MatMulNBits_2_0_509.const", "file_size": 98304 }, "model.layers.19.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 1809233920, "file_name": ".cache\\MatMulNBits_2_0_510.const", "file_size": 32768 }, "model.layers.19.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 1809266688, "file_name": ".cache\\MatMulNBits_2_0_511.const", "file_size": 12582912 }, "model.layers.19.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 1821849600, "file_name": ".cache\\MatMulNBits_2_0_512.const", "file_size": 786432 }, "model.layers.19.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 1822636032, "file_name": ".cache\\MatMulNBits_2_0_513.const", "file_size": 98304 }, "model.layers.19.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 1822734336, "file_name": ".cache\\MatMulNBits_2_0_514.const", "file_size": 32768 }, "model.layers.19.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8192, 3072 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1822767104, "file_name": ".cache\\MatMulNBits_2_0_515.const", "file_size": 25165824 }, "model.layers.19.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 1847932928, "file_name": ".cache\\MatMulNBits_2_0_516.const", "file_size": 12288 }, "model.layers.19.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 1847945216, "file_name": ".cache\\MatMulNBits_2_0_517.const", "file_size": 786432 }, "model.layers.19.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 196608 ], "size_in_bytes": 196608, "op_tensor_size": 196608, "offset": 1848731648, "file_name": ".cache\\MatMulNBits_2_0_518.const", "file_size": 196608 }, "model.layers.20.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1848928256, "file_name": ".cache\\MatMulNBits_2_0_519.const", "file_size": 6144 }, "model.layers.20.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 6144 ], "size_in_bytes": 18874368, "op_tensor_size": 18874368, "offset": 1848934400, "file_name": ".cache\\MatMulNBits_2_0_520.const", "file_size": 18874368 }, "model.layers.20.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 6144 ], "size_in_bytes": 24576, "op_tensor_size": 24576, "offset": 1867808768, "file_name": ".cache\\MatMulNBits_2_0_521.const", "file_size": 24576 }, "model.layers.20.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 147456 ], "size_in_bytes": 589824, "op_tensor_size": 589824, "offset": 1867833344, "file_name": ".cache\\MatMulNBits_2_0_522.const", "file_size": 589824 }, "model.layers.20.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 147456 ], "size_in_bytes": 147456, "op_tensor_size": 147456, "offset": 1868423168, "file_name": ".cache\\MatMulNBits_2_0_523.const", "file_size": 147456 }, "model.layers.20.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 1868570624, "file_name": ".cache\\MatMulNBits_2_0_524.const", "file_size": 9437184 }, "model.layers.20.attn.v_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 1878007808, "file_name": ".cache\\MatMulNBits_2_0_525.const", "file_size": 12288 }, "model.layers.20.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 1878020096, "file_name": ".cache\\MatMulNBits_2_0_526.const", "file_size": 294912 }, "model.layers.20.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 1878315008, "file_name": ".cache\\MatMulNBits_2_0_527.const", "file_size": 73728 }, "model.layers.20.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 1878388736, "file_name": ".cache\\MatMulNBits_2_0_528.const", "file_size": 9437184 }, "model.layers.20.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 1887825920, "file_name": ".cache\\MatMulNBits_2_0_529.const", "file_size": 12288 }, "model.layers.20.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 1887838208, "file_name": ".cache\\MatMulNBits_2_0_530.const", "file_size": 294912 }, "model.layers.20.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 1888133120, "file_name": ".cache\\MatMulNBits_2_0_531.const", "file_size": 73728 }, "model.layers.20.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1888206848, "file_name": ".cache\\MatMulNBits_2_0_532.const", "file_size": 6144 }, "model.layers.20.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 1888212992, "file_name": ".cache\\MatMulNBits_2_0_533.const", "file_size": 12582912 }, "model.layers.20.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 1900795904, "file_name": ".cache\\MatMulNBits_2_0_534.const", "file_size": 786432 }, "model.layers.20.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 1901582336, "file_name": ".cache\\MatMulNBits_2_0_535.const", "file_size": 98304 }, "model.layers.20.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 1901680640, "file_name": ".cache\\MatMulNBits_2_0_536.const", "file_size": 32768 }, "model.layers.20.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 1901713408, "file_name": ".cache\\MatMulNBits_2_0_537.const", "file_size": 12582912 }, "model.layers.20.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 1914296320, "file_name": ".cache\\MatMulNBits_2_0_538.const", "file_size": 786432 }, "model.layers.20.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 1915082752, "file_name": ".cache\\MatMulNBits_2_0_539.const", "file_size": 98304 }, "model.layers.20.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 1915181056, "file_name": ".cache\\MatMulNBits_2_0_540.const", "file_size": 32768 }, "model.layers.20.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8192, 3072 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1915213824, "file_name": ".cache\\MatMulNBits_2_0_541.const", "file_size": 25165824 }, "model.layers.20.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 1940379648, "file_name": ".cache\\MatMulNBits_2_0_542.const", "file_size": 12288 }, "model.layers.20.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 1940391936, "file_name": ".cache\\MatMulNBits_2_0_543.const", "file_size": 786432 }, "model.layers.20.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 196608 ], "size_in_bytes": 196608, "op_tensor_size": 196608, "offset": 1941178368, "file_name": ".cache\\MatMulNBits_2_0_544.const", "file_size": 196608 }, "model.layers.21.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1941374976, "file_name": ".cache\\MatMulNBits_2_0_545.const", "file_size": 6144 }, "model.layers.21.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 6144 ], "size_in_bytes": 18874368, "op_tensor_size": 18874368, "offset": 1941381120, "file_name": ".cache\\MatMulNBits_2_0_546.const", "file_size": 18874368 }, "model.layers.21.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 6144 ], "size_in_bytes": 24576, "op_tensor_size": 24576, "offset": 1960255488, "file_name": ".cache\\MatMulNBits_2_0_547.const", "file_size": 24576 }, "model.layers.21.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 147456 ], "size_in_bytes": 589824, "op_tensor_size": 589824, "offset": 1960280064, "file_name": ".cache\\MatMulNBits_2_0_548.const", "file_size": 589824 }, "model.layers.21.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 147456 ], "size_in_bytes": 147456, "op_tensor_size": 147456, "offset": 1960869888, "file_name": ".cache\\MatMulNBits_2_0_549.const", "file_size": 147456 }, "model.layers.21.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 1961017344, "file_name": ".cache\\MatMulNBits_2_0_550.const", "file_size": 9437184 }, "model.layers.21.attn.v_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 1970454528, "file_name": ".cache\\MatMulNBits_2_0_551.const", "file_size": 12288 }, "model.layers.21.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 1970466816, "file_name": ".cache\\MatMulNBits_2_0_552.const", "file_size": 294912 }, "model.layers.21.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 1970761728, "file_name": ".cache\\MatMulNBits_2_0_553.const", "file_size": 73728 }, "model.layers.21.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 1970835456, "file_name": ".cache\\MatMulNBits_2_0_554.const", "file_size": 9437184 }, "model.layers.21.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 1980272640, "file_name": ".cache\\MatMulNBits_2_0_555.const", "file_size": 12288 }, "model.layers.21.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 1980284928, "file_name": ".cache\\MatMulNBits_2_0_556.const", "file_size": 294912 }, "model.layers.21.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 1980579840, "file_name": ".cache\\MatMulNBits_2_0_557.const", "file_size": 73728 }, "model.layers.21.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 1980653568, "file_name": ".cache\\MatMulNBits_2_0_558.const", "file_size": 6144 }, "model.layers.21.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 1980659712, "file_name": ".cache\\MatMulNBits_2_0_559.const", "file_size": 12582912 }, "model.layers.21.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 1993242624, "file_name": ".cache\\MatMulNBits_2_0_560.const", "file_size": 786432 }, "model.layers.21.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 1994029056, "file_name": ".cache\\MatMulNBits_2_0_561.const", "file_size": 98304 }, "model.layers.21.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 1994127360, "file_name": ".cache\\MatMulNBits_2_0_562.const", "file_size": 32768 }, "model.layers.21.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 1994160128, "file_name": ".cache\\MatMulNBits_2_0_563.const", "file_size": 12582912 }, "model.layers.21.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 2006743040, "file_name": ".cache\\MatMulNBits_2_0_564.const", "file_size": 786432 }, "model.layers.21.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 2007529472, "file_name": ".cache\\MatMulNBits_2_0_565.const", "file_size": 98304 }, "model.layers.21.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 2007627776, "file_name": ".cache\\MatMulNBits_2_0_566.const", "file_size": 32768 }, "model.layers.21.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8192, 3072 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 2007660544, "file_name": ".cache\\MatMulNBits_2_0_567.const", "file_size": 25165824 }, "model.layers.21.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 2032826368, "file_name": ".cache\\MatMulNBits_2_0_568.const", "file_size": 12288 }, "model.layers.21.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 2032838656, "file_name": ".cache\\MatMulNBits_2_0_569.const", "file_size": 786432 }, "model.layers.21.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 196608 ], "size_in_bytes": 196608, "op_tensor_size": 196608, "offset": 2033625088, "file_name": ".cache\\MatMulNBits_2_0_570.const", "file_size": 196608 }, "model.layers.22.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 2033821696, "file_name": ".cache\\MatMulNBits_2_0_571.const", "file_size": 6144 }, "model.layers.22.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 6144 ], "size_in_bytes": 18874368, "op_tensor_size": 18874368, "offset": 2033827840, "file_name": ".cache\\MatMulNBits_2_0_572.const", "file_size": 18874368 }, "model.layers.22.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 6144 ], "size_in_bytes": 24576, "op_tensor_size": 24576, "offset": 2052702208, "file_name": ".cache\\MatMulNBits_2_0_573.const", "file_size": 24576 }, "model.layers.22.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 147456 ], "size_in_bytes": 589824, "op_tensor_size": 589824, "offset": 2052726784, "file_name": ".cache\\MatMulNBits_2_0_574.const", "file_size": 589824 }, "model.layers.22.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 147456 ], "size_in_bytes": 147456, "op_tensor_size": 147456, "offset": 2053316608, "file_name": ".cache\\MatMulNBits_2_0_575.const", "file_size": 147456 }, "model.layers.22.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 2053464064, "file_name": ".cache\\MatMulNBits_2_0_576.const", "file_size": 9437184 }, "model.layers.22.attn.v_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 2062901248, "file_name": ".cache\\MatMulNBits_2_0_577.const", "file_size": 12288 }, "model.layers.22.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 2062913536, "file_name": ".cache\\MatMulNBits_2_0_578.const", "file_size": 294912 }, "model.layers.22.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 2063208448, "file_name": ".cache\\MatMulNBits_2_0_579.const", "file_size": 73728 }, "model.layers.22.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 2063282176, "file_name": ".cache\\MatMulNBits_2_0_580.const", "file_size": 9437184 }, "model.layers.22.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 2072719360, "file_name": ".cache\\MatMulNBits_2_0_581.const", "file_size": 12288 }, "model.layers.22.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 2072731648, "file_name": ".cache\\MatMulNBits_2_0_582.const", "file_size": 294912 }, "model.layers.22.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 2073026560, "file_name": ".cache\\MatMulNBits_2_0_583.const", "file_size": 73728 }, "model.layers.22.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 2073100288, "file_name": ".cache\\MatMulNBits_2_0_584.const", "file_size": 6144 }, "model.layers.22.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 2073106432, "file_name": ".cache\\MatMulNBits_2_0_585.const", "file_size": 12582912 }, "model.layers.22.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 2085689344, "file_name": ".cache\\MatMulNBits_2_0_586.const", "file_size": 786432 }, "model.layers.22.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 2086475776, "file_name": ".cache\\MatMulNBits_2_0_587.const", "file_size": 98304 }, "model.layers.22.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 2086574080, "file_name": ".cache\\MatMulNBits_2_0_588.const", "file_size": 32768 }, "model.layers.22.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 2086606848, "file_name": ".cache\\MatMulNBits_2_0_589.const", "file_size": 12582912 }, "model.layers.22.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 2099189760, "file_name": ".cache\\MatMulNBits_2_0_590.const", "file_size": 786432 }, "model.layers.22.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 2099976192, "file_name": ".cache\\MatMulNBits_2_0_591.const", "file_size": 98304 }, "model.layers.22.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 2100074496, "file_name": ".cache\\MatMulNBits_2_0_592.const", "file_size": 32768 }, "model.layers.22.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8192, 3072 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 2100107264, "file_name": ".cache\\MatMulNBits_2_0_593.const", "file_size": 25165824 }, "model.layers.22.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 2125273088, "file_name": ".cache\\MatMulNBits_2_0_594.const", "file_size": 12288 }, "model.layers.22.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 2125285376, "file_name": ".cache\\MatMulNBits_2_0_595.const", "file_size": 786432 }, "model.layers.22.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 196608 ], "size_in_bytes": 196608, "op_tensor_size": 196608, "offset": 2126071808, "file_name": ".cache\\MatMulNBits_2_0_596.const", "file_size": 196608 }, "model.layers.23.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 2126268416, "file_name": ".cache\\MatMulNBits_2_0_597.const", "file_size": 6144 }, "model.layers.23.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 6144 ], "size_in_bytes": 18874368, "op_tensor_size": 18874368, "offset": 2126274560, "file_name": ".cache\\MatMulNBits_2_0_598.const", "file_size": 18874368 }, "model.layers.23.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 6144 ], "size_in_bytes": 24576, "op_tensor_size": 24576, "offset": 2145148928, "file_name": ".cache\\MatMulNBits_2_0_599.const", "file_size": 24576 }, "model.layers.23.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 147456 ], "size_in_bytes": 589824, "op_tensor_size": 589824, "offset": 2145173504, "file_name": ".cache\\MatMulNBits_2_0_600.const", "file_size": 589824 }, "model.layers.23.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 147456 ], "size_in_bytes": 147456, "op_tensor_size": 147456, "offset": 2145763328, "file_name": ".cache\\MatMulNBits_2_0_601.const", "file_size": 147456 }, "model.layers.23.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 2145910784, "file_name": ".cache\\MatMulNBits_2_0_602.const", "file_size": 9437184 }, "model.layers.23.attn.v_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 2155347968, "file_name": ".cache\\MatMulNBits_2_0_603.const", "file_size": 12288 }, "model.layers.23.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 2155360256, "file_name": ".cache\\MatMulNBits_2_0_604.const", "file_size": 294912 }, "model.layers.23.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 2155655168, "file_name": ".cache\\MatMulNBits_2_0_605.const", "file_size": 73728 }, "model.layers.23.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 2155728896, "file_name": ".cache\\MatMulNBits_2_0_606.const", "file_size": 9437184 }, "model.layers.23.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 2165166080, "file_name": ".cache\\MatMulNBits_2_0_607.const", "file_size": 12288 }, "model.layers.23.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 2165178368, "file_name": ".cache\\MatMulNBits_2_0_608.const", "file_size": 294912 }, "model.layers.23.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 2165473280, "file_name": ".cache\\MatMulNBits_2_0_609.const", "file_size": 73728 }, "model.layers.23.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 2165547008, "file_name": ".cache\\MatMulNBits_2_0_610.const", "file_size": 6144 }, "model.layers.23.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 2165553152, "file_name": ".cache\\MatMulNBits_2_0_611.const", "file_size": 12582912 }, "model.layers.23.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 2178136064, "file_name": ".cache\\MatMulNBits_2_0_612.const", "file_size": 786432 }, "model.layers.23.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 2178922496, "file_name": ".cache\\MatMulNBits_2_0_613.const", "file_size": 98304 }, "model.layers.23.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 2179020800, "file_name": ".cache\\MatMulNBits_2_0_614.const", "file_size": 32768 }, "model.layers.23.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 2179053568, "file_name": ".cache\\MatMulNBits_2_0_615.const", "file_size": 12582912 }, "model.layers.23.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 2191636480, "file_name": ".cache\\MatMulNBits_2_0_616.const", "file_size": 786432 }, "model.layers.23.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 2192422912, "file_name": ".cache\\MatMulNBits_2_0_617.const", "file_size": 98304 }, "model.layers.23.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 2192521216, "file_name": ".cache\\MatMulNBits_2_0_618.const", "file_size": 32768 }, "model.layers.23.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8192, 3072 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 2192553984, "file_name": ".cache\\MatMulNBits_2_0_619.const", "file_size": 25165824 }, "model.layers.23.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 2217719808, "file_name": ".cache\\MatMulNBits_2_0_620.const", "file_size": 12288 }, "model.layers.23.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 2217732096, "file_name": ".cache\\MatMulNBits_2_0_621.const", "file_size": 786432 }, "model.layers.23.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 196608 ], "size_in_bytes": 196608, "op_tensor_size": 196608, "offset": 2218518528, "file_name": ".cache\\MatMulNBits_2_0_622.const", "file_size": 196608 }, "model.layers.24.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 2218715136, "file_name": ".cache\\MatMulNBits_2_0_623.const", "file_size": 6144 }, "model.layers.24.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 6144 ], "size_in_bytes": 18874368, "op_tensor_size": 18874368, "offset": 2218721280, "file_name": ".cache\\MatMulNBits_2_0_624.const", "file_size": 18874368 }, "model.layers.24.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 6144 ], "size_in_bytes": 24576, "op_tensor_size": 24576, "offset": 2237595648, "file_name": ".cache\\MatMulNBits_2_0_625.const", "file_size": 24576 }, "model.layers.24.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 147456 ], "size_in_bytes": 589824, "op_tensor_size": 589824, "offset": 2237620224, "file_name": ".cache\\MatMulNBits_2_0_626.const", "file_size": 589824 }, "model.layers.24.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 147456 ], "size_in_bytes": 147456, "op_tensor_size": 147456, "offset": 2238210048, "file_name": ".cache\\MatMulNBits_2_0_627.const", "file_size": 147456 }, "model.layers.24.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 2238357504, "file_name": ".cache\\MatMulNBits_2_0_628.const", "file_size": 9437184 }, "model.layers.24.attn.v_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 2247794688, "file_name": ".cache\\MatMulNBits_2_0_629.const", "file_size": 12288 }, "model.layers.24.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 2247806976, "file_name": ".cache\\MatMulNBits_2_0_630.const", "file_size": 294912 }, "model.layers.24.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 2248101888, "file_name": ".cache\\MatMulNBits_2_0_631.const", "file_size": 73728 }, "model.layers.24.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 2248175616, "file_name": ".cache\\MatMulNBits_2_0_632.const", "file_size": 9437184 }, "model.layers.24.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 2257612800, "file_name": ".cache\\MatMulNBits_2_0_633.const", "file_size": 12288 }, "model.layers.24.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 2257625088, "file_name": ".cache\\MatMulNBits_2_0_634.const", "file_size": 294912 }, "model.layers.24.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 2257920000, "file_name": ".cache\\MatMulNBits_2_0_635.const", "file_size": 73728 }, "model.layers.24.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 2257993728, "file_name": ".cache\\MatMulNBits_2_0_636.const", "file_size": 6144 }, "model.layers.24.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 2257999872, "file_name": ".cache\\MatMulNBits_2_0_637.const", "file_size": 12582912 }, "model.layers.24.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 2270582784, "file_name": ".cache\\MatMulNBits_2_0_638.const", "file_size": 786432 }, "model.layers.24.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 2271369216, "file_name": ".cache\\MatMulNBits_2_0_639.const", "file_size": 98304 }, "model.layers.24.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 2271467520, "file_name": ".cache\\MatMulNBits_2_0_640.const", "file_size": 32768 }, "model.layers.24.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 2271500288, "file_name": ".cache\\MatMulNBits_2_0_641.const", "file_size": 12582912 }, "model.layers.24.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 2284083200, "file_name": ".cache\\MatMulNBits_2_0_642.const", "file_size": 786432 }, "model.layers.24.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 2284869632, "file_name": ".cache\\MatMulNBits_2_0_643.const", "file_size": 98304 }, "model.layers.24.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 2284967936, "file_name": ".cache\\MatMulNBits_2_0_644.const", "file_size": 32768 }, "model.layers.24.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8192, 3072 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 2285000704, "file_name": ".cache\\MatMulNBits_2_0_645.const", "file_size": 25165824 }, "model.layers.24.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 2310166528, "file_name": ".cache\\MatMulNBits_2_0_646.const", "file_size": 12288 }, "model.layers.24.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 2310178816, "file_name": ".cache\\MatMulNBits_2_0_647.const", "file_size": 786432 }, "model.layers.24.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 196608 ], "size_in_bytes": 196608, "op_tensor_size": 196608, "offset": 2310965248, "file_name": ".cache\\MatMulNBits_2_0_648.const", "file_size": 196608 }, "model.layers.25.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 2311161856, "file_name": ".cache\\MatMulNBits_2_0_649.const", "file_size": 6144 }, "model.layers.25.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 6144 ], "size_in_bytes": 18874368, "op_tensor_size": 18874368, "offset": 2311168000, "file_name": ".cache\\MatMulNBits_2_0_650.const", "file_size": 18874368 }, "model.layers.25.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 6144 ], "size_in_bytes": 24576, "op_tensor_size": 24576, "offset": 2330042368, "file_name": ".cache\\MatMulNBits_2_0_651.const", "file_size": 24576 }, "model.layers.25.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 147456 ], "size_in_bytes": 589824, "op_tensor_size": 589824, "offset": 2330066944, "file_name": ".cache\\MatMulNBits_2_0_652.const", "file_size": 589824 }, "model.layers.25.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 147456 ], "size_in_bytes": 147456, "op_tensor_size": 147456, "offset": 2330656768, "file_name": ".cache\\MatMulNBits_2_0_653.const", "file_size": 147456 }, "model.layers.25.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 2330804224, "file_name": ".cache\\MatMulNBits_2_0_654.const", "file_size": 9437184 }, "model.layers.25.attn.v_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 2340241408, "file_name": ".cache\\MatMulNBits_2_0_655.const", "file_size": 12288 }, "model.layers.25.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 2340253696, "file_name": ".cache\\MatMulNBits_2_0_656.const", "file_size": 294912 }, "model.layers.25.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 2340548608, "file_name": ".cache\\MatMulNBits_2_0_657.const", "file_size": 73728 }, "model.layers.25.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 2340622336, "file_name": ".cache\\MatMulNBits_2_0_658.const", "file_size": 9437184 }, "model.layers.25.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 2350059520, "file_name": ".cache\\MatMulNBits_2_0_659.const", "file_size": 12288 }, "model.layers.25.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 2350071808, "file_name": ".cache\\MatMulNBits_2_0_660.const", "file_size": 294912 }, "model.layers.25.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 2350366720, "file_name": ".cache\\MatMulNBits_2_0_661.const", "file_size": 73728 }, "model.layers.25.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 2350440448, "file_name": ".cache\\MatMulNBits_2_0_662.const", "file_size": 6144 }, "model.layers.25.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 2350446592, "file_name": ".cache\\MatMulNBits_2_0_663.const", "file_size": 12582912 }, "model.layers.25.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 2363029504, "file_name": ".cache\\MatMulNBits_2_0_664.const", "file_size": 786432 }, "model.layers.25.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 2363815936, "file_name": ".cache\\MatMulNBits_2_0_665.const", "file_size": 98304 }, "model.layers.25.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 2363914240, "file_name": ".cache\\MatMulNBits_2_0_666.const", "file_size": 32768 }, "model.layers.25.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 2363947008, "file_name": ".cache\\MatMulNBits_2_0_667.const", "file_size": 12582912 }, "model.layers.25.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 2376529920, "file_name": ".cache\\MatMulNBits_2_0_668.const", "file_size": 786432 }, "model.layers.25.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 2377316352, "file_name": ".cache\\MatMulNBits_2_0_669.const", "file_size": 98304 }, "model.layers.25.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 2377414656, "file_name": ".cache\\MatMulNBits_2_0_670.const", "file_size": 32768 }, "model.layers.25.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8192, 3072 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 2377447424, "file_name": ".cache\\MatMulNBits_2_0_671.const", "file_size": 25165824 }, "model.layers.25.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 2402613248, "file_name": ".cache\\MatMulNBits_2_0_672.const", "file_size": 12288 }, "model.layers.25.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 2402625536, "file_name": ".cache\\MatMulNBits_2_0_673.const", "file_size": 786432 }, "model.layers.25.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 196608 ], "size_in_bytes": 196608, "op_tensor_size": 196608, "offset": 2403411968, "file_name": ".cache\\MatMulNBits_2_0_674.const", "file_size": 196608 }, "model.layers.26.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 2403608576, "file_name": ".cache\\MatMulNBits_2_0_675.const", "file_size": 6144 }, "model.layers.26.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 6144 ], "size_in_bytes": 18874368, "op_tensor_size": 18874368, "offset": 2403614720, "file_name": ".cache\\MatMulNBits_2_0_676.const", "file_size": 18874368 }, "model.layers.26.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 6144 ], "size_in_bytes": 24576, "op_tensor_size": 24576, "offset": 2422489088, "file_name": ".cache\\MatMulNBits_2_0_677.const", "file_size": 24576 }, "model.layers.26.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 147456 ], "size_in_bytes": 589824, "op_tensor_size": 589824, "offset": 2422513664, "file_name": ".cache\\MatMulNBits_2_0_678.const", "file_size": 589824 }, "model.layers.26.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 147456 ], "size_in_bytes": 147456, "op_tensor_size": 147456, "offset": 2423103488, "file_name": ".cache\\MatMulNBits_2_0_679.const", "file_size": 147456 }, "model.layers.26.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 2423250944, "file_name": ".cache\\MatMulNBits_2_0_680.const", "file_size": 9437184 }, "model.layers.26.attn.v_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 2432688128, "file_name": ".cache\\MatMulNBits_2_0_681.const", "file_size": 12288 }, "model.layers.26.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 2432700416, "file_name": ".cache\\MatMulNBits_2_0_682.const", "file_size": 294912 }, "model.layers.26.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 2432995328, "file_name": ".cache\\MatMulNBits_2_0_683.const", "file_size": 73728 }, "model.layers.26.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 2433069056, "file_name": ".cache\\MatMulNBits_2_0_684.const", "file_size": 9437184 }, "model.layers.26.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 2442506240, "file_name": ".cache\\MatMulNBits_2_0_685.const", "file_size": 12288 }, "model.layers.26.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 2442518528, "file_name": ".cache\\MatMulNBits_2_0_686.const", "file_size": 294912 }, "model.layers.26.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 2442813440, "file_name": ".cache\\MatMulNBits_2_0_687.const", "file_size": 73728 }, "model.layers.26.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 2442887168, "file_name": ".cache\\MatMulNBits_2_0_688.const", "file_size": 6144 }, "model.layers.26.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 2442893312, "file_name": ".cache\\MatMulNBits_2_0_689.const", "file_size": 12582912 }, "model.layers.26.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 2455476224, "file_name": ".cache\\MatMulNBits_2_0_690.const", "file_size": 786432 }, "model.layers.26.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 2456262656, "file_name": ".cache\\MatMulNBits_2_0_691.const", "file_size": 98304 }, "model.layers.26.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 2456360960, "file_name": ".cache\\MatMulNBits_2_0_692.const", "file_size": 32768 }, "model.layers.26.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 2456393728, "file_name": ".cache\\MatMulNBits_2_0_693.const", "file_size": 12582912 }, "model.layers.26.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 2468976640, "file_name": ".cache\\MatMulNBits_2_0_694.const", "file_size": 786432 }, "model.layers.26.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 2469763072, "file_name": ".cache\\MatMulNBits_2_0_695.const", "file_size": 98304 }, "model.layers.26.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 2469861376, "file_name": ".cache\\MatMulNBits_2_0_696.const", "file_size": 32768 }, "model.layers.26.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8192, 3072 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 2469894144, "file_name": ".cache\\MatMulNBits_2_0_697.const", "file_size": 25165824 }, "model.layers.26.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 2495059968, "file_name": ".cache\\MatMulNBits_2_0_698.const", "file_size": 12288 }, "model.layers.26.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 2495072256, "file_name": ".cache\\MatMulNBits_2_0_699.const", "file_size": 786432 }, "model.layers.26.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 196608 ], "size_in_bytes": 196608, "op_tensor_size": 196608, "offset": 2495858688, "file_name": ".cache\\MatMulNBits_2_0_700.const", "file_size": 196608 }, "model.layers.27.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 2496055296, "file_name": ".cache\\MatMulNBits_2_0_701.const", "file_size": 6144 }, "model.layers.27.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 6144 ], "size_in_bytes": 18874368, "op_tensor_size": 18874368, "offset": 2496061440, "file_name": ".cache\\MatMulNBits_2_0_702.const", "file_size": 18874368 }, "model.layers.27.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 6144 ], "size_in_bytes": 24576, "op_tensor_size": 24576, "offset": 2514935808, "file_name": ".cache\\MatMulNBits_2_0_703.const", "file_size": 24576 }, "model.layers.27.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 147456 ], "size_in_bytes": 589824, "op_tensor_size": 589824, "offset": 2514960384, "file_name": ".cache\\MatMulNBits_2_0_704.const", "file_size": 589824 }, "model.layers.27.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 147456 ], "size_in_bytes": 147456, "op_tensor_size": 147456, "offset": 2515550208, "file_name": ".cache\\MatMulNBits_2_0_705.const", "file_size": 147456 }, "model.layers.27.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 2515697664, "file_name": ".cache\\MatMulNBits_2_0_706.const", "file_size": 9437184 }, "model.layers.27.attn.v_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 2525134848, "file_name": ".cache\\MatMulNBits_2_0_707.const", "file_size": 12288 }, "model.layers.27.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 2525147136, "file_name": ".cache\\MatMulNBits_2_0_708.const", "file_size": 294912 }, "model.layers.27.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 2525442048, "file_name": ".cache\\MatMulNBits_2_0_709.const", "file_size": 73728 }, "model.layers.27.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 2525515776, "file_name": ".cache\\MatMulNBits_2_0_710.const", "file_size": 9437184 }, "model.layers.27.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 2534952960, "file_name": ".cache\\MatMulNBits_2_0_711.const", "file_size": 12288 }, "model.layers.27.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 2534965248, "file_name": ".cache\\MatMulNBits_2_0_712.const", "file_size": 294912 }, "model.layers.27.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 2535260160, "file_name": ".cache\\MatMulNBits_2_0_713.const", "file_size": 73728 }, "model.layers.27.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 2535333888, "file_name": ".cache\\MatMulNBits_2_0_714.const", "file_size": 6144 }, "model.layers.27.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 2535340032, "file_name": ".cache\\MatMulNBits_2_0_715.const", "file_size": 12582912 }, "model.layers.27.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 2547922944, "file_name": ".cache\\MatMulNBits_2_0_716.const", "file_size": 786432 }, "model.layers.27.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 2548709376, "file_name": ".cache\\MatMulNBits_2_0_717.const", "file_size": 98304 }, "model.layers.27.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 2548807680, "file_name": ".cache\\MatMulNBits_2_0_718.const", "file_size": 32768 }, "model.layers.27.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 2548840448, "file_name": ".cache\\MatMulNBits_2_0_719.const", "file_size": 12582912 }, "model.layers.27.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 2561423360, "file_name": ".cache\\MatMulNBits_2_0_720.const", "file_size": 786432 }, "model.layers.27.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 2562209792, "file_name": ".cache\\MatMulNBits_2_0_721.const", "file_size": 98304 }, "model.layers.27.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 2562308096, "file_name": ".cache\\MatMulNBits_2_0_722.const", "file_size": 32768 }, "model.layers.27.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8192, 3072 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 2562340864, "file_name": ".cache\\MatMulNBits_2_0_723.const", "file_size": 25165824 }, "model.layers.27.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 2587506688, "file_name": ".cache\\MatMulNBits_2_0_724.const", "file_size": 12288 }, "model.layers.27.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 2587518976, "file_name": ".cache\\MatMulNBits_2_0_725.const", "file_size": 786432 }, "model.layers.27.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 196608 ], "size_in_bytes": 196608, "op_tensor_size": 196608, "offset": 2588305408, "file_name": ".cache\\MatMulNBits_2_0_726.const", "file_size": 196608 }, "model.layers.28.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 2588502016, "file_name": ".cache\\MatMulNBits_2_0_727.const", "file_size": 6144 }, "model.layers.28.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 6144 ], "size_in_bytes": 18874368, "op_tensor_size": 18874368, "offset": 2588508160, "file_name": ".cache\\MatMulNBits_2_0_728.const", "file_size": 18874368 }, "model.layers.28.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 6144 ], "size_in_bytes": 24576, "op_tensor_size": 24576, "offset": 2607382528, "file_name": ".cache\\MatMulNBits_2_0_729.const", "file_size": 24576 }, "model.layers.28.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 147456 ], "size_in_bytes": 589824, "op_tensor_size": 589824, "offset": 2607407104, "file_name": ".cache\\MatMulNBits_2_0_730.const", "file_size": 589824 }, "model.layers.28.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 147456 ], "size_in_bytes": 147456, "op_tensor_size": 147456, "offset": 2607996928, "file_name": ".cache\\MatMulNBits_2_0_731.const", "file_size": 147456 }, "model.layers.28.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 2608144384, "file_name": ".cache\\MatMulNBits_2_0_732.const", "file_size": 9437184 }, "model.layers.28.attn.v_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 2617581568, "file_name": ".cache\\MatMulNBits_2_0_733.const", "file_size": 12288 }, "model.layers.28.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 2617593856, "file_name": ".cache\\MatMulNBits_2_0_734.const", "file_size": 294912 }, "model.layers.28.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 2617888768, "file_name": ".cache\\MatMulNBits_2_0_735.const", "file_size": 73728 }, "model.layers.28.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 2617962496, "file_name": ".cache\\MatMulNBits_2_0_736.const", "file_size": 9437184 }, "model.layers.28.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 2627399680, "file_name": ".cache\\MatMulNBits_2_0_737.const", "file_size": 12288 }, "model.layers.28.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 2627411968, "file_name": ".cache\\MatMulNBits_2_0_738.const", "file_size": 294912 }, "model.layers.28.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 2627706880, "file_name": ".cache\\MatMulNBits_2_0_739.const", "file_size": 73728 }, "model.layers.28.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 2627780608, "file_name": ".cache\\MatMulNBits_2_0_740.const", "file_size": 6144 }, "model.layers.28.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 2627786752, "file_name": ".cache\\MatMulNBits_2_0_741.const", "file_size": 12582912 }, "model.layers.28.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 2640369664, "file_name": ".cache\\MatMulNBits_2_0_742.const", "file_size": 786432 }, "model.layers.28.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 2641156096, "file_name": ".cache\\MatMulNBits_2_0_743.const", "file_size": 98304 }, "model.layers.28.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 2641254400, "file_name": ".cache\\MatMulNBits_2_0_744.const", "file_size": 32768 }, "model.layers.28.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 2641287168, "file_name": ".cache\\MatMulNBits_2_0_745.const", "file_size": 12582912 }, "model.layers.28.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 2653870080, "file_name": ".cache\\MatMulNBits_2_0_746.const", "file_size": 786432 }, "model.layers.28.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 2654656512, "file_name": ".cache\\MatMulNBits_2_0_747.const", "file_size": 98304 }, "model.layers.28.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 2654754816, "file_name": ".cache\\MatMulNBits_2_0_748.const", "file_size": 32768 }, "model.layers.28.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8192, 3072 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 2654787584, "file_name": ".cache\\MatMulNBits_2_0_749.const", "file_size": 25165824 }, "model.layers.28.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 2679953408, "file_name": ".cache\\MatMulNBits_2_0_750.const", "file_size": 12288 }, "model.layers.28.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 2679965696, "file_name": ".cache\\MatMulNBits_2_0_751.const", "file_size": 786432 }, "model.layers.28.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 196608 ], "size_in_bytes": 196608, "op_tensor_size": 196608, "offset": 2680752128, "file_name": ".cache\\MatMulNBits_2_0_752.const", "file_size": 196608 }, "model.layers.29.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 2680948736, "file_name": ".cache\\MatMulNBits_2_0_753.const", "file_size": 6144 }, "model.layers.29.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 6144 ], "size_in_bytes": 18874368, "op_tensor_size": 18874368, "offset": 2680954880, "file_name": ".cache\\MatMulNBits_2_0_754.const", "file_size": 18874368 }, "model.layers.29.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 6144 ], "size_in_bytes": 24576, "op_tensor_size": 24576, "offset": 2699829248, "file_name": ".cache\\MatMulNBits_2_0_755.const", "file_size": 24576 }, "model.layers.29.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 147456 ], "size_in_bytes": 589824, "op_tensor_size": 589824, "offset": 2699853824, "file_name": ".cache\\MatMulNBits_2_0_756.const", "file_size": 589824 }, "model.layers.29.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 147456 ], "size_in_bytes": 147456, "op_tensor_size": 147456, "offset": 2700443648, "file_name": ".cache\\MatMulNBits_2_0_757.const", "file_size": 147456 }, "model.layers.29.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 2700591104, "file_name": ".cache\\MatMulNBits_2_0_758.const", "file_size": 9437184 }, "model.layers.29.attn.v_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 2710028288, "file_name": ".cache\\MatMulNBits_2_0_759.const", "file_size": 12288 }, "model.layers.29.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 2710040576, "file_name": ".cache\\MatMulNBits_2_0_760.const", "file_size": 294912 }, "model.layers.29.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 2710335488, "file_name": ".cache\\MatMulNBits_2_0_761.const", "file_size": 73728 }, "model.layers.29.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 2710409216, "file_name": ".cache\\MatMulNBits_2_0_762.const", "file_size": 9437184 }, "model.layers.29.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 2719846400, "file_name": ".cache\\MatMulNBits_2_0_763.const", "file_size": 12288 }, "model.layers.29.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 2719858688, "file_name": ".cache\\MatMulNBits_2_0_764.const", "file_size": 294912 }, "model.layers.29.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 2720153600, "file_name": ".cache\\MatMulNBits_2_0_765.const", "file_size": 73728 }, "model.layers.29.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 2720227328, "file_name": ".cache\\MatMulNBits_2_0_766.const", "file_size": 6144 }, "model.layers.29.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 2720233472, "file_name": ".cache\\MatMulNBits_2_0_767.const", "file_size": 12582912 }, "model.layers.29.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 2732816384, "file_name": ".cache\\MatMulNBits_2_0_768.const", "file_size": 786432 }, "model.layers.29.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 2733602816, "file_name": ".cache\\MatMulNBits_2_0_769.const", "file_size": 98304 }, "model.layers.29.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 2733701120, "file_name": ".cache\\MatMulNBits_2_0_770.const", "file_size": 32768 }, "model.layers.29.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 2733733888, "file_name": ".cache\\MatMulNBits_2_0_771.const", "file_size": 12582912 }, "model.layers.29.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 2746316800, "file_name": ".cache\\MatMulNBits_2_0_772.const", "file_size": 786432 }, "model.layers.29.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 2747103232, "file_name": ".cache\\MatMulNBits_2_0_773.const", "file_size": 98304 }, "model.layers.29.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 2747201536, "file_name": ".cache\\MatMulNBits_2_0_774.const", "file_size": 32768 }, "model.layers.29.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8192, 3072 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 2747234304, "file_name": ".cache\\MatMulNBits_2_0_775.const", "file_size": 25165824 }, "model.layers.29.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 2772400128, "file_name": ".cache\\MatMulNBits_2_0_776.const", "file_size": 12288 }, "model.layers.29.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 2772412416, "file_name": ".cache\\MatMulNBits_2_0_777.const", "file_size": 786432 }, "model.layers.29.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 196608 ], "size_in_bytes": 196608, "op_tensor_size": 196608, "offset": 2773198848, "file_name": ".cache\\MatMulNBits_2_0_778.const", "file_size": 196608 }, "model.layers.30.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 2773395456, "file_name": ".cache\\MatMulNBits_2_0_779.const", "file_size": 6144 }, "model.layers.30.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 6144 ], "size_in_bytes": 18874368, "op_tensor_size": 18874368, "offset": 2773401600, "file_name": ".cache\\MatMulNBits_2_0_780.const", "file_size": 18874368 }, "model.layers.30.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 6144 ], "size_in_bytes": 24576, "op_tensor_size": 24576, "offset": 2792275968, "file_name": ".cache\\MatMulNBits_2_0_781.const", "file_size": 24576 }, "model.layers.30.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 147456 ], "size_in_bytes": 589824, "op_tensor_size": 589824, "offset": 2792300544, "file_name": ".cache\\MatMulNBits_2_0_782.const", "file_size": 589824 }, "model.layers.30.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 147456 ], "size_in_bytes": 147456, "op_tensor_size": 147456, "offset": 2792890368, "file_name": ".cache\\MatMulNBits_2_0_783.const", "file_size": 147456 }, "model.layers.30.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 2793037824, "file_name": ".cache\\MatMulNBits_2_0_784.const", "file_size": 9437184 }, "model.layers.30.attn.v_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 2802475008, "file_name": ".cache\\MatMulNBits_2_0_785.const", "file_size": 12288 }, "model.layers.30.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 2802487296, "file_name": ".cache\\MatMulNBits_2_0_786.const", "file_size": 294912 }, "model.layers.30.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 2802782208, "file_name": ".cache\\MatMulNBits_2_0_787.const", "file_size": 73728 }, "model.layers.30.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 2802855936, "file_name": ".cache\\MatMulNBits_2_0_788.const", "file_size": 9437184 }, "model.layers.30.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 2812293120, "file_name": ".cache\\MatMulNBits_2_0_789.const", "file_size": 12288 }, "model.layers.30.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 2812305408, "file_name": ".cache\\MatMulNBits_2_0_790.const", "file_size": 294912 }, "model.layers.30.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 2812600320, "file_name": ".cache\\MatMulNBits_2_0_791.const", "file_size": 73728 }, "model.layers.30.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 2812674048, "file_name": ".cache\\MatMulNBits_2_0_792.const", "file_size": 6144 }, "model.layers.30.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 2812680192, "file_name": ".cache\\MatMulNBits_2_0_793.const", "file_size": 12582912 }, "model.layers.30.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 2825263104, "file_name": ".cache\\MatMulNBits_2_0_794.const", "file_size": 786432 }, "model.layers.30.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 2826049536, "file_name": ".cache\\MatMulNBits_2_0_795.const", "file_size": 98304 }, "model.layers.30.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 2826147840, "file_name": ".cache\\MatMulNBits_2_0_796.const", "file_size": 32768 }, "model.layers.30.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 2826180608, "file_name": ".cache\\MatMulNBits_2_0_797.const", "file_size": 12582912 }, "model.layers.30.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 2838763520, "file_name": ".cache\\MatMulNBits_2_0_798.const", "file_size": 786432 }, "model.layers.30.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 2839549952, "file_name": ".cache\\MatMulNBits_2_0_799.const", "file_size": 98304 }, "model.layers.30.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 2839648256, "file_name": ".cache\\MatMulNBits_2_0_800.const", "file_size": 32768 }, "model.layers.30.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8192, 3072 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 2839681024, "file_name": ".cache\\MatMulNBits_2_0_801.const", "file_size": 25165824 }, "model.layers.30.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 2864846848, "file_name": ".cache\\MatMulNBits_2_0_802.const", "file_size": 12288 }, "model.layers.30.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 2864859136, "file_name": ".cache\\MatMulNBits_2_0_803.const", "file_size": 786432 }, "model.layers.30.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 196608 ], "size_in_bytes": 196608, "op_tensor_size": 196608, "offset": 2865645568, "file_name": ".cache\\MatMulNBits_2_0_804.const", "file_size": 196608 }, "model.layers.31.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 2865842176, "file_name": ".cache\\MatMulNBits_2_0_805.const", "file_size": 6144 }, "model.layers.31.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 6144 ], "size_in_bytes": 18874368, "op_tensor_size": 18874368, "offset": 2865848320, "file_name": ".cache\\MatMulNBits_2_0_806.const", "file_size": 18874368 }, "model.layers.31.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 6144 ], "size_in_bytes": 24576, "op_tensor_size": 24576, "offset": 2884722688, "file_name": ".cache\\MatMulNBits_2_0_807.const", "file_size": 24576 }, "model.layers.31.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 147456 ], "size_in_bytes": 589824, "op_tensor_size": 589824, "offset": 2884747264, "file_name": ".cache\\MatMulNBits_2_0_808.const", "file_size": 589824 }, "model.layers.31.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 147456 ], "size_in_bytes": 147456, "op_tensor_size": 147456, "offset": 2885337088, "file_name": ".cache\\MatMulNBits_2_0_809.const", "file_size": 147456 }, "model.layers.31.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 2885484544, "file_name": ".cache\\MatMulNBits_2_0_810.const", "file_size": 9437184 }, "model.layers.31.attn.v_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 2894921728, "file_name": ".cache\\MatMulNBits_2_0_811.const", "file_size": 12288 }, "model.layers.31.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 2894934016, "file_name": ".cache\\MatMulNBits_2_0_812.const", "file_size": 294912 }, "model.layers.31.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 2895228928, "file_name": ".cache\\MatMulNBits_2_0_813.const", "file_size": 73728 }, "model.layers.31.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 3072 ], "size_in_bytes": 9437184, "op_tensor_size": 9437184, "offset": 2895302656, "file_name": ".cache\\MatMulNBits_2_0_814.const", "file_size": 9437184 }, "model.layers.31.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 2904739840, "file_name": ".cache\\MatMulNBits_2_0_815.const", "file_size": 12288 }, "model.layers.31.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 73728 ], "size_in_bytes": 294912, "op_tensor_size": 294912, "offset": 2904752128, "file_name": ".cache\\MatMulNBits_2_0_816.const", "file_size": 294912 }, "model.layers.31.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 73728 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 2905047040, "file_name": ".cache\\MatMulNBits_2_0_817.const", "file_size": 73728 }, "model.layers.31.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 2905120768, "file_name": ".cache\\MatMulNBits_2_0_818.const", "file_size": 6144 }, "model.layers.31.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 2905126912, "file_name": ".cache\\MatMulNBits_2_0_819.const", "file_size": 12582912 }, "model.layers.31.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 2917709824, "file_name": ".cache\\MatMulNBits_2_0_820.const", "file_size": 786432 }, "model.layers.31.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 2918496256, "file_name": ".cache\\MatMulNBits_2_0_821.const", "file_size": 98304 }, "model.layers.31.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 2918594560, "file_name": ".cache\\MatMulNBits_2_0_822.const", "file_size": 32768 }, "model.layers.31.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8192, 24, 64 ], "size_in_bytes": 12582912, "op_tensor_size": 12582912, "offset": 2918627328, "file_name": ".cache\\MatMulNBits_2_0_823.const", "file_size": 12582912 }, "model.layers.31.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 2931210240, "file_name": ".cache\\MatMulNBits_2_0_824.const", "file_size": 786432 }, "model.layers.31.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 98304 ], "size_in_bytes": 98304, "op_tensor_size": 98304, "offset": 2931996672, "file_name": ".cache\\MatMulNBits_2_0_825.const", "file_size": 98304 }, "model.layers.31.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8192 ], "size_in_bytes": 32768, "op_tensor_size": 32768, "offset": 2932094976, "file_name": ".cache\\MatMulNBits_2_0_826.const", "file_size": 32768 }, "model.layers.31.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8192, 3072 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 2932127744, "file_name": ".cache\\MatMulNBits_2_0_827.const", "file_size": 25165824 }, "model.layers.31.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 2957293568, "file_name": ".cache\\MatMulNBits_2_0_828.const", "file_size": 12288 }, "model.layers.31.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 196608 ], "size_in_bytes": 786432, "op_tensor_size": 786432, "offset": 2957305856, "file_name": ".cache\\MatMulNBits_2_0_829.const", "file_size": 786432 }, "model.layers.31.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 196608 ], "size_in_bytes": 196608, "op_tensor_size": 196608, "offset": 2958092288, "file_name": ".cache\\MatMulNBits_2_0_830.const", "file_size": 196608 }, "model.layers.32.final_norm_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 3072 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 2958288896, "file_name": ".cache\\MatMulNBits_2_0_831.const", "file_size": 6144 }, "lm_head.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072, 32064 ], "size_in_bytes": 98500608, "op_tensor_size": 98500608, "offset": 2958295040, "file_name": ".cache\\MatMulNBits_2_0_832.const", "file_size": 98500608 }, "lm_head.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 32064 ], "size_in_bytes": 128256, "op_tensor_size": 128256, "offset": 3056795648, "file_name": ".cache\\MatMulNBits_2_0_833.const", "file_size": 128256 }, "lm_head.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 769536 ], "size_in_bytes": 3078144, "op_tensor_size": 3078144, "offset": 3056923904, "file_name": ".cache\\MatMulNBits_2_0_834.const", "file_size": 3078144 }, "lm_head.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 769536 ], "size_in_bytes": 769536, "op_tensor_size": 769536, "offset": 3060002048, "file_name": ".cache\\MatMulNBits_2_0_835.const", "file_size": 769536 }, "past_key_values.0.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 0 }, "past_key_values.0.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 25165824 }, "present.0.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 0 }, "present.0.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 25165824 }, "past_key_values.1.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 50331648 }, "past_key_values.1.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 75497472 }, "present.1.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 50331648 }, "present.1.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 75497472 }, "past_key_values.2.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 100663296 }, "past_key_values.2.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 125829120 }, "present.2.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 100663296 }, "present.2.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 125829120 }, "past_key_values.3.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 150994944 }, "past_key_values.3.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 176160768 }, "present.3.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 150994944 }, "present.3.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 176160768 }, "past_key_values.4.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 201326592 }, "past_key_values.4.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 226492416 }, "present.4.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 201326592 }, "present.4.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 226492416 }, "past_key_values.5.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 251658240 }, "past_key_values.5.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 276824064 }, "present.5.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 251658240 }, "present.5.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 276824064 }, "past_key_values.6.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 301989888 }, "past_key_values.6.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 327155712 }, "present.6.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 301989888 }, "present.6.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 327155712 }, "past_key_values.7.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 352321536 }, "past_key_values.7.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 377487360 }, "present.7.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 352321536 }, "present.7.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 377487360 }, "past_key_values.8.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 402653184 }, "past_key_values.8.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 427819008 }, "present.8.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 402653184 }, "present.8.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 427819008 }, "past_key_values.9.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 452984832 }, "past_key_values.9.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 478150656 }, "present.9.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 452984832 }, "present.9.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 478150656 }, "past_key_values.10.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 503316480 }, "past_key_values.10.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 528482304 }, "present.10.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 503316480 }, "present.10.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 528482304 }, "past_key_values.11.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 553648128 }, "past_key_values.11.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 578813952 }, "present.11.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 553648128 }, "present.11.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 578813952 }, "past_key_values.12.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 603979776 }, "past_key_values.12.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 629145600 }, "present.12.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 603979776 }, "present.12.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 629145600 }, "past_key_values.13.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 654311424 }, "past_key_values.13.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 679477248 }, "present.13.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 654311424 }, "present.13.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 679477248 }, "past_key_values.14.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 704643072 }, "past_key_values.14.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 729808896 }, "present.14.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 704643072 }, "present.14.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 729808896 }, "past_key_values.15.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 754974720 }, "past_key_values.15.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 780140544 }, "present.15.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 754974720 }, "present.15.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 780140544 }, "past_key_values.16.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 805306368 }, "past_key_values.16.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 830472192 }, "present.16.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 805306368 }, "present.16.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 830472192 }, "past_key_values.17.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 855638016 }, "past_key_values.17.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 880803840 }, "present.17.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 855638016 }, "present.17.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 880803840 }, "past_key_values.18.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 905969664 }, "past_key_values.18.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 931135488 }, "present.18.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 905969664 }, "present.18.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 931135488 }, "past_key_values.19.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 956301312 }, "past_key_values.19.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 981467136 }, "present.19.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 956301312 }, "present.19.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 981467136 }, "past_key_values.20.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1006632960 }, "past_key_values.20.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1031798784 }, "present.20.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1006632960 }, "present.20.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1031798784 }, "past_key_values.21.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1056964608 }, "past_key_values.21.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1082130432 }, "present.21.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1056964608 }, "present.21.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1082130432 }, "past_key_values.22.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1107296256 }, "past_key_values.22.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1132462080 }, "present.22.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1107296256 }, "present.22.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1132462080 }, "past_key_values.23.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1157627904 }, "past_key_values.23.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1182793728 }, "present.23.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1157627904 }, "present.23.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1182793728 }, "past_key_values.24.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1207959552 }, "past_key_values.24.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1233125376 }, "present.24.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1207959552 }, "present.24.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1233125376 }, "past_key_values.25.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1258291200 }, "past_key_values.25.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1283457024 }, "present.25.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1258291200 }, "present.25.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1283457024 }, "past_key_values.26.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1308622848 }, "past_key_values.26.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1333788672 }, "present.26.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1308622848 }, "present.26.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1333788672 }, "past_key_values.27.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1358954496 }, "past_key_values.27.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1384120320 }, "present.27.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1358954496 }, "present.27.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1384120320 }, "past_key_values.28.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1409286144 }, "past_key_values.28.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1434451968 }, "present.28.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1409286144 }, "present.28.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1434451968 }, "past_key_values.29.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1459617792 }, "past_key_values.29.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1484783616 }, "present.29.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1459617792 }, "present.29.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1484783616 }, "past_key_values.30.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1509949440 }, "past_key_values.30.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1535115264 }, "present.30.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1509949440 }, "present.30.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1535115264 }, "past_key_values.31.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1560281088 }, "past_key_values.31.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1585446912 }, "present.31.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1560281088 }, "present.31.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 32, 4096, 96 ], "size_in_bytes": 25165824, "op_tensor_size": 25165824, "offset": 1585446912 }, "sin_cos_cache_token": { "packed_buffer_label": "ext_buf_1", "xrt_arg_id": 6, "dtype": "bfloat16", "shape": [ 135168, 96 ], "size_in_bytes": 25952256, "op_tensor_size": 25952256, "offset": 0 } }, "aux_info": { "is_llm": true } }